summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/amd/amdgpu
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile300
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ObjectID.h745
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c426
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1511
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c648
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c1518
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c868
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h479
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c194
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c422
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c549
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c1055
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c684
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c812
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c581
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c602
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c3026
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c1888
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h219
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c945
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c680
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c260
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c517
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c309
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c474
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c2033
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1786
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c136
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c994
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c2213
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c6052
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2635
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c1731
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c501
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h369
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c239
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2922
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c240
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c262
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c115
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c1006
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c438
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c1007
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c1311
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c938
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h422
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c324
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c251
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c394
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c494
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c604
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c300
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c756
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c357
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c1756
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c1478
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h453
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h618
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c1642
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h412
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c359
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c680
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c3827
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h539
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c395
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c3334
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h770
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c1420
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h168
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c171
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h129
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c713
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h451
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c577
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c551
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h275
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c346
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h175
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c183
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c460
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h563
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2541
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h213
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c1345
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c339
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c1376
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c1185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c1263
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h429
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c1095
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h364
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c670
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c2735
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h557
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c1108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c308
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c948
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c415
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h184
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c1125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h332
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c658
-rw-r--r--drivers/gpu/drm/amd/amdgpu/arct_reg_init.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c1591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h170
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c885
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c770
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c2128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.h78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c176
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c2321
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c451
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c1385
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h607
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_ci.h944
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_defs.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h975
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h997
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h942
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_si.h941
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_vi.h944
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c451
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c3660
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3799
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c3484
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c3581
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c685
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/emu_soc.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c9547
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c6409
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c3611
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c5172
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c7230
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c7269
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c1019
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c1940
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c4421
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c453
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c656
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c485
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c701
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c514
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c502
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c1259
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c1074
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c1149
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c1400
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c1766
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c2509
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c174
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c142
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h2172
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c769
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c769
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c385
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c614
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c816
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c832
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c615
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c841
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c1218
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mca_v3_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c1187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c1340
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c799
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c1346
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c844
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c718
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c629
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c665
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c571
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c1687
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h338
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c420
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c440
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c638
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c736
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h4822
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c580
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c622
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c416
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c300
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c432
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c812
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c348
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c736
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c1138
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h470
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h466
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c173
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c667
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c782
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c352
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c382
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_common.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c1274
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c1721
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c2645
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c274
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c2200
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c1920
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c1861
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c1711
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h5664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c2789
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c855
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h291
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c313
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h2494
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c312
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c797
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c303
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c1475
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h207
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h434
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c896
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_rap_if.h84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h170
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h155
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c502
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h2245
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c462
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c528
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c466
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.h75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.c446
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_7.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c824
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c813
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c921
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c1687
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c1917
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c683
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c1023
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1162
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c2074
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c2106
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1995
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c2237
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c2166
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c1767
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c663
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h3335
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c720
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c2221
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h514
493 files changed, 300805 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
new file mode 100644
index 000000000..22d88f8ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: MIT
+
+config DRM_AMDGPU
+ tristate "AMD GPU"
+ depends on DRM && PCI && MMU
+ depends on !UML
+ select FW_LOADER
+ select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_HDMI_HELPER
+ select DRM_DISPLAY_HDCP_HELPER
+ select DRM_DISPLAY_HELPER
+ select DRM_KMS_HELPER
+ select DRM_SCHED
+ select DRM_TTM
+ select DRM_TTM_HELPER
+ select POWER_SUPPLY
+ select HWMON
+ select I2C
+ select I2C_ALGOBIT
+ select BACKLIGHT_CLASS_DEVICE
+ select INTERVAL_TREE
+ select DRM_BUDDY
+ select DRM_SUBALLOC_HELPER
+ select DRM_EXEC
+ # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
+ # ACPI_VIDEO's dependencies must also be selected.
+ select INPUT if ACPI
+ select ACPI_VIDEO if ACPI
+ # On x86 ACPI_VIDEO also needs ACPI_WMI
+ select X86_PLATFORM_DEVICES if ACPI && X86
+ select ACPI_WMI if ACPI && X86
+ help
+ Choose this option if you have a recent AMD Radeon graphics card.
+
+ If M is selected, the module will be called amdgpu.
+
+config DRM_AMDGPU_SI
+ bool "Enable amdgpu support for SI parts"
+ depends on DRM_AMDGPU
+ help
+ Choose this option if you want to enable experimental support
+ for SI (Southern Islands) asics.
+
+ SI is already supported in radeon. Experimental support for SI
+ in amdgpu will be disabled by default and is still provided by
+ radeon. Use module options to override this:
+
+ radeon.si_support=0 amdgpu.si_support=1
+
+config DRM_AMDGPU_CIK
+ bool "Enable amdgpu support for CIK parts"
+ depends on DRM_AMDGPU
+ help
+ Choose this option if you want to enable support for CIK (Sea
+ Islands) asics.
+
+ CIK is already supported in radeon. Support for CIK in amdgpu
+ will be disabled by default and is still provided by radeon.
+ Use module options to override this:
+
+ radeon.cik_support=0 amdgpu.cik_support=1
+
+config DRM_AMDGPU_USERPTR
+ bool "Always enable userptr write support"
+ depends on DRM_AMDGPU
+ depends on MMU
+ select HMM_MIRROR
+ select MMU_NOTIFIER
+ help
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
+
+config DRM_AMDGPU_WERROR
+ bool "Force the compiler to throw an error instead of a warning when compiling"
+ depends on DRM_AMDGPU
+ depends on EXPERT
+ depends on !COMPILE_TEST
+ default n
+ help
+ Add -Werror to the build flags for amdgpu.ko.
+ Only enable this if you are warning code for amdgpu.ko.
+
+source "drivers/gpu/drm/amd/acp/Kconfig"
+source "drivers/gpu/drm/amd/display/Kconfig"
+source "drivers/gpu/drm/amd/amdkfd/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
new file mode 100644
index 000000000..384b798a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -0,0 +1,300 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the drm device driver. This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+FULL_AMD_PATH=$(srctree)/$(src)/..
+DISPLAY_FOLDER_NAME=display
+FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
+
+ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
+ -I$(FULL_AMD_PATH)/include \
+ -I$(FULL_AMD_PATH)/amdgpu \
+ -I$(FULL_AMD_PATH)/pm/inc \
+ -I$(FULL_AMD_PATH)/acp/include \
+ -I$(FULL_AMD_DISPLAY_PATH) \
+ -I$(FULL_AMD_DISPLAY_PATH)/include \
+ -I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
+ -I$(FULL_AMD_DISPLAY_PATH)/dc \
+ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
+ -I$(FULL_AMD_PATH)/amdkfd
+
+subdir-ccflags-y := -Wextra
+subdir-ccflags-y += -Wunused
+subdir-ccflags-y += -Wmissing-prototypes
+subdir-ccflags-y += -Wmissing-declarations
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-format-attribute
+# Need this to avoid recursive variable evaluation issues
+cond-flags := $(call cc-option, -Wunused-but-set-variable) \
+ $(call cc-option, -Wunused-const-variable) \
+ $(call cc-option, -Wstringop-truncation) \
+ $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(cond-flags)
+subdir-ccflags-y += -Wno-unused-parameter
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-sign-compare
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-override-init
+subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
+
+amdgpu-y := amdgpu_drv.o
+
+# add KMS driver
+amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
+ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
+ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
+ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
+ amdgpu_gem.o amdgpu_ring.o \
+ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
+ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
+ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
+ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
+ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
+ amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o amdgpu_hdp.o \
+ amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
+ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o \
+ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+ amdgpu_ring_mux.o amdgpu_xcp.o
+
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
+amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
+
+# add asic specific block
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \
+ dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
+
+amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \
+ uvd_v3_1.o
+
+amdgpu-y += \
+ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
+ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
+ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
+ nbio_v7_9.o aqua_vanjaram.o
+
+# add DF block
+amdgpu-y += \
+ df_v1_7.o \
+ df_v3_6.o \
+ df_v4_3.o
+
+# add GMC block
+amdgpu-y += \
+ gmc_v7_0.o \
+ gmc_v8_0.o \
+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
+ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
+ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
+ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o
+
+# add UMC block
+amdgpu-y += \
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
+
+# add IH block
+amdgpu-y += \
+ amdgpu_irq.o \
+ amdgpu_ih.o \
+ iceland_ih.o \
+ tonga_ih.o \
+ cz_ih.o \
+ vega10_ih.o \
+ vega20_ih.o \
+ navi10_ih.o \
+ ih_v6_0.o \
+ ih_v6_1.o
+
+# add PSP block
+amdgpu-y += \
+ amdgpu_psp.o \
+ psp_v3_1.o \
+ psp_v10_0.o \
+ psp_v11_0.o \
+ psp_v11_0_8.o \
+ psp_v12_0.o \
+ psp_v13_0.o \
+ psp_v13_0_4.o
+
+# add DCE block
+amdgpu-y += \
+ dce_v10_0.o \
+ dce_v11_0.o \
+ amdgpu_vkms.o
+
+# add GFX block
+amdgpu-y += \
+ amdgpu_gfx.o \
+ amdgpu_rlc.o \
+ gfx_v8_0.o \
+ gfx_v9_0.o \
+ gfx_v9_4.o \
+ gfx_v9_4_2.o \
+ gfx_v9_4_3.o \
+ gfx_v10_0.o \
+ imu_v11_0.o \
+ gfx_v11_0.o \
+ gfx_v11_0_3.o \
+ imu_v11_0_3.o
+
+# add async DMA block
+amdgpu-y += \
+ amdgpu_sdma.o \
+ sdma_v2_4.o \
+ sdma_v3_0.o \
+ sdma_v4_0.o \
+ sdma_v4_4.o \
+ sdma_v4_4_2.o \
+ sdma_v5_0.o \
+ sdma_v5_2.o \
+ sdma_v6_0.o
+
+# add MES block
+amdgpu-y += \
+ amdgpu_mes.o \
+ mes_v10_1.o \
+ mes_v11_0.o
+
+# add UVD block
+amdgpu-y += \
+ amdgpu_uvd.o \
+ uvd_v5_0.o \
+ uvd_v6_0.o \
+ uvd_v7_0.o
+
+# add VCE block
+amdgpu-y += \
+ amdgpu_vce.o \
+ vce_v3_0.o \
+ vce_v4_0.o
+
+# add VCN and JPEG block
+amdgpu-y += \
+ amdgpu_vcn.o \
+ vcn_sw_ring.o \
+ vcn_v1_0.o \
+ vcn_v2_0.o \
+ vcn_v2_5.o \
+ vcn_v3_0.o \
+ vcn_v4_0.o \
+ vcn_v4_0_3.o \
+ amdgpu_jpeg.o \
+ jpeg_v1_0.o \
+ jpeg_v2_0.o \
+ jpeg_v2_5.o \
+ jpeg_v3_0.o \
+ jpeg_v4_0.o \
+ jpeg_v4_0_3.o
+
+# add ATHUB block
+amdgpu-y += \
+ athub_v1_0.o \
+ athub_v2_0.o \
+ athub_v2_1.o \
+ athub_v3_0.o
+
+# add SMUIO block
+amdgpu-y += \
+ smuio_v9_0.o \
+ smuio_v11_0.o \
+ smuio_v11_0_6.o \
+ smuio_v13_0.o \
+ smuio_v13_0_3.o \
+ smuio_v13_0_6.o
+
+# add reset block
+amdgpu-y += \
+ amdgpu_reset.o
+
+# add MCA block
+amdgpu-y += \
+ mca_v3_0.o
+
+# add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+
+ifneq ($(CONFIG_HSA_AMD),)
+AMDKFD_PATH := ../amdkfd
+include $(FULL_AMD_PATH)/amdkfd/Makefile
+amdgpu-y += $(AMDKFD_FILES)
+amdgpu-y += \
+ amdgpu_amdkfd_fence.o \
+ amdgpu_amdkfd_gpuvm.o \
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o \
+ amdgpu_amdkfd_arcturus.o \
+ amdgpu_amdkfd_aldebaran.o \
+ amdgpu_amdkfd_gc_9_4_3.o \
+ amdgpu_amdkfd_gfx_v10.o \
+ amdgpu_amdkfd_gfx_v10_3.o \
+ amdgpu_amdkfd_gfx_v11.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
+
+# add cgs
+amdgpu-y += amdgpu_cgs.o
+
+# GPU scheduler
+amdgpu-y += amdgpu_job.o
+
+# ACP componet
+ifneq ($(CONFIG_DRM_AMD_ACP),)
+amdgpu-y += amdgpu_acp.o
+
+AMDACPPATH := ../acp
+include $(FULL_AMD_PATH)/acp/Makefile
+
+amdgpu-y += $(AMD_ACP_FILES)
+endif
+
+amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
+amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
+amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
+
+include $(FULL_AMD_PATH)/pm/Makefile
+
+amdgpu-y += $(AMD_POWERPLAY_FILES)
+
+ifneq ($(CONFIG_DRM_AMD_DC),)
+
+RELATIVE_AMD_DISPLAY_PATH = ../$(DISPLAY_FOLDER_NAME)
+include $(FULL_AMD_DISPLAY_PATH)/Makefile
+
+amdgpu-y += $(AMD_DISPLAY_FILES)
+
+endif
+
+obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
new file mode 100644
index 000000000..a0f0a17e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -0,0 +1,745 @@
+/*
+* Copyright 2006-2007 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+/* based on stg/asic_reg/drivers/inc/asic_reg/ObjectID.h ver 23 */
+
+#ifndef _OBJECTID_H
+#define _OBJECTID_H
+
+#if defined(_X86_)
+#pragma pack(1)
+#endif
+
+/****************************************************/
+/* Graphics Object Type Definition */
+/****************************************************/
+#define GRAPH_OBJECT_TYPE_NONE 0x0
+#define GRAPH_OBJECT_TYPE_GPU 0x1
+#define GRAPH_OBJECT_TYPE_ENCODER 0x2
+#define GRAPH_OBJECT_TYPE_CONNECTOR 0x3
+#define GRAPH_OBJECT_TYPE_ROUTER 0x4
+/* deleted */
+#define GRAPH_OBJECT_TYPE_DISPLAY_PATH 0x6
+#define GRAPH_OBJECT_TYPE_GENERIC 0x7
+
+/****************************************************/
+/* Encoder Object ID Definition */
+/****************************************************/
+#define ENCODER_OBJECT_ID_NONE 0x00
+
+/* Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_INTERNAL_LVDS 0x01
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS1 0x02
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS2 0x03
+#define ENCODER_OBJECT_ID_INTERNAL_DAC1 0x04
+#define ENCODER_OBJECT_ID_INTERNAL_DAC2 0x05 /* TV/CV DAC */
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOA 0x06
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOB 0x07
+
+/* External Third Party Encoders */
+#define ENCODER_OBJECT_ID_SI170B 0x08
+#define ENCODER_OBJECT_ID_CH7303 0x09
+#define ENCODER_OBJECT_ID_CH7301 0x0A
+#define ENCODER_OBJECT_ID_INTERNAL_DVO1 0x0B /* This belongs to Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOA 0x0C
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOB 0x0D
+#define ENCODER_OBJECT_ID_TITFP513 0x0E
+#define ENCODER_OBJECT_ID_INTERNAL_LVTM1 0x0F /* not used for Radeon */
+#define ENCODER_OBJECT_ID_VT1623 0x10
+#define ENCODER_OBJECT_ID_HDMI_SI1930 0x11
+#define ENCODER_OBJECT_ID_HDMI_INTERNAL 0x12
+#define ENCODER_OBJECT_ID_ALMOND 0x22
+#define ENCODER_OBJECT_ID_TRAVIS 0x23
+#define ENCODER_OBJECT_ID_NUTMEG 0x22
+#define ENCODER_OBJECT_ID_HDMI_ANX9805 0x26
+
+/* Kaleidoscope (KLDSCP) Class Display Hardware (internal) */
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 0x13
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1 0x14
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1 0x15
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2 0x16 /* Shared with CV/TV and CRT */
+#define ENCODER_OBJECT_ID_SI178 0X17 /* External TMDS (dual link, no HDCP.) */
+#define ENCODER_OBJECT_ID_MVPU_FPGA 0x18 /* MVPU FPGA chip */
+#define ENCODER_OBJECT_ID_INTERNAL_DDI 0x19
+#define ENCODER_OBJECT_ID_VT1625 0x1A
+#define ENCODER_OBJECT_ID_HDMI_SI1932 0x1B
+#define ENCODER_OBJECT_ID_DP_AN9801 0x1C
+#define ENCODER_OBJECT_ID_DP_DP501 0x1D
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY 0x1E
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA 0x1F
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 0x20
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 0x21
+#define ENCODER_OBJECT_ID_INTERNAL_VCE 0x24
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 0x25
+#define ENCODER_OBJECT_ID_INTERNAL_AMCLK 0x27
+
+#define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO 0xFF
+
+/****************************************************/
+/* Connector Object ID Definition */
+/****************************************************/
+#define CONNECTOR_OBJECT_ID_NONE 0x00
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I 0x01
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I 0x02
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D 0x03
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D 0x04
+#define CONNECTOR_OBJECT_ID_VGA 0x05
+#define CONNECTOR_OBJECT_ID_COMPOSITE 0x06
+#define CONNECTOR_OBJECT_ID_SVIDEO 0x07
+#define CONNECTOR_OBJECT_ID_YPbPr 0x08
+#define CONNECTOR_OBJECT_ID_D_CONNECTOR 0x09
+#define CONNECTOR_OBJECT_ID_9PIN_DIN 0x0A /* Supports both CV & TV */
+#define CONNECTOR_OBJECT_ID_SCART 0x0B
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_A 0x0C
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_B 0x0D
+#define CONNECTOR_OBJECT_ID_LVDS 0x0E
+#define CONNECTOR_OBJECT_ID_7PIN_DIN 0x0F
+#define CONNECTOR_OBJECT_ID_PCIE_CONNECTOR 0x10
+#define CONNECTOR_OBJECT_ID_CROSSFIRE 0x11
+#define CONNECTOR_OBJECT_ID_HARDCODE_DVI 0x12
+#define CONNECTOR_OBJECT_ID_DISPLAYPORT 0x13
+#define CONNECTOR_OBJECT_ID_eDP 0x14
+#define CONNECTOR_OBJECT_ID_MXM 0x15
+#define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16
+#define CONNECTOR_OBJECT_ID_USBC 0x17
+
+/* deleted */
+
+/****************************************************/
+/* Router Object ID Definition */
+/****************************************************/
+#define ROUTER_OBJECT_ID_NONE 0x00
+#define ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL 0x01
+
+/****************************************************/
+/* Generic Object ID Definition */
+/****************************************************/
+#define GENERIC_OBJECT_ID_NONE 0x00
+#define GENERIC_OBJECT_ID_GLSYNC 0x01
+#define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE 0x02
+#define GENERIC_OBJECT_ID_MXM_OPM 0x03
+#define GENERIC_OBJECT_ID_STEREO_PIN 0x04 //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin
+#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05
+
+/****************************************************/
+/* Graphics Object ENUM ID Definition */
+/****************************************************/
+#define GRAPH_OBJECT_ENUM_ID1 0x01
+#define GRAPH_OBJECT_ENUM_ID2 0x02
+#define GRAPH_OBJECT_ENUM_ID3 0x03
+#define GRAPH_OBJECT_ENUM_ID4 0x04
+#define GRAPH_OBJECT_ENUM_ID5 0x05
+#define GRAPH_OBJECT_ENUM_ID6 0x06
+#define GRAPH_OBJECT_ENUM_ID7 0x07
+
+/****************************************************/
+/* Graphics Object ID Bit definition */
+/****************************************************/
+#define OBJECT_ID_MASK 0x00FF
+#define ENUM_ID_MASK 0x0700
+#define RESERVED1_ID_MASK 0x0800
+#define OBJECT_TYPE_MASK 0x7000
+#define RESERVED2_ID_MASK 0x8000
+
+#define OBJECT_ID_SHIFT 0x00
+#define ENUM_ID_SHIFT 0x08
+#define OBJECT_TYPE_SHIFT 0x0C
+
+
+/****************************************************/
+/* Graphics Object family definition */
+/****************************************************/
+#define CONSTRUCTOBJECTFAMILYID(GRAPHICS_OBJECT_TYPE, GRAPHICS_OBJECT_ID) (GRAPHICS_OBJECT_TYPE << OBJECT_TYPE_SHIFT | \
+ GRAPHICS_OBJECT_ID << OBJECT_ID_SHIFT)
+/****************************************************/
+/* GPU Object ID definition - Shared with BIOS */
+/****************************************************/
+#define GPU_ENUM_ID1 ( GRAPH_OBJECT_TYPE_GPU << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT)
+
+/****************************************************/
+/* Encoder Object ID definition - Shared with BIOS */
+/****************************************************/
+/*
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1 0x2101
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1 0x2102
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1 0x2103
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1 0x2104
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1 0x2105
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1 0x2106
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1 0x2107
+#define ENCODER_SIL170B_ENUM_ID1 0x2108
+#define ENCODER_CH7303_ENUM_ID1 0x2109
+#define ENCODER_CH7301_ENUM_ID1 0x210A
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1 0x210B
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1 0x210C
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1 0x210D
+#define ENCODER_TITFP513_ENUM_ID1 0x210E
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1 0x210F
+#define ENCODER_VT1623_ENUM_ID1 0x2110
+#define ENCODER_HDMI_SI1930_ENUM_ID1 0x2111
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1 0x2112
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1 0x2113
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1 0x2114
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1 0x2115
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1 0x2116
+#define ENCODER_SI178_ENUM_ID1 0x2117
+#define ENCODER_MVPU_FPGA_ENUM_ID1 0x2118
+#define ENCODER_INTERNAL_DDI_ENUM_ID1 0x2119
+#define ENCODER_VT1625_ENUM_ID1 0x211A
+#define ENCODER_HDMI_SI1932_ENUM_ID1 0x211B
+#define ENCODER_ENCODER_DP_AN9801_ENUM_ID1 0x211C
+#define ENCODER_DP_DP501_ENUM_ID1 0x211D
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1 0x211E
+*/
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_LVDS << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_TMDS1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_TMDS2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_DAC2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+#define ENCODER_SIL170B_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_SI170B << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7303_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_CH7303 << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7301_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_CH7301 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_EXTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_TITFP513_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_TITFP513 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_LVTM1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_VT1623_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_VT1623 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1930_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_HDMI_SI1930 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_HDMI_INTERNAL << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2 << OBJECT_ID_SHIFT) // Shared with CV/TV and CRT
+
+#define ENCODER_SI178_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_SI178 << OBJECT_ID_SHIFT)
+
+#define ENCODER_MVPU_FPGA_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_MVPU_FPGA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DDI_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_DDI << OBJECT_ID_SHIFT)
+
+#define ENCODER_VT1625_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_VT1625 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1932_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_HDMI_SI1932 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_DP501_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_DP_DP501 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_AN9801_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_DP_AN9801 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_LVTMA_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY3_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY3_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 << OBJECT_ID_SHIFT)
+
+#define ENCODER_GENERAL_EXTERNAL_DVO_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO << OBJECT_ID_SHIFT)
+
+#define ENCODER_ALMOND_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_ALMOND << OBJECT_ID_SHIFT)
+
+#define ENCODER_ALMOND_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_ALMOND << OBJECT_ID_SHIFT)
+
+#define ENCODER_TRAVIS_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_TRAVIS << OBJECT_ID_SHIFT)
+
+#define ENCODER_TRAVIS_ENUM_ID2 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_TRAVIS << OBJECT_ID_SHIFT)
+
+#define ENCODER_NUTMEG_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_NUTMEG << OBJECT_ID_SHIFT)
+
+#define ENCODER_VCE_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_INTERNAL_VCE << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_ANX9805_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ENCODER_OBJECT_ID_HDMI_ANX9805 << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Connector Object ID definition - Shared with BIOS */
+/****************************************************/
+/*
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1 0x3101
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1 0x3102
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1 0x3103
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1 0x3104
+#define CONNECTOR_VGA_ENUM_ID1 0x3105
+#define CONNECTOR_COMPOSITE_ENUM_ID1 0x3106
+#define CONNECTOR_SVIDEO_ENUM_ID1 0x3107
+#define CONNECTOR_YPbPr_ENUM_ID1 0x3108
+#define CONNECTOR_D_CONNECTORE_ENUM_ID1 0x3109
+#define CONNECTOR_9PIN_DIN_ENUM_ID1 0x310A
+#define CONNECTOR_SCART_ENUM_ID1 0x310B
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1 0x310C
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1 0x310D
+#define CONNECTOR_LVDS_ENUM_ID1 0x310E
+#define CONNECTOR_7PIN_DIN_ENUM_ID1 0x310F
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1 0x3110
+*/
+#define CONNECTOR_LVDS_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_LVDS << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_LVDS_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_LVDS << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_eDP_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_eDP_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID3 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID4 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID5 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID6 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID3 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID4 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_COMPOSITE_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_COMPOSITE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_COMPOSITE_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_COMPOSITE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SVIDEO_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SVIDEO << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SVIDEO_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SVIDEO << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_YPbPr_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_YPbPr << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_YPbPr_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_YPbPr << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_D_CONNECTOR_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_D_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_D_CONNECTOR_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_D_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_9PIN_DIN_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_9PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_9PIN_DIN_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_9PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SCART_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SCART << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SCART_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_SCART << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID3 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID4 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID5 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID6 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_B << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HDMI_TYPE_B << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_7PIN_DIN_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_7PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_7PIN_DIN_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_7PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID3 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID4 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID5 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID6 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_MXM_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_DP_A
+
+#define CONNECTOR_MXM_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_DP_B
+
+#define CONNECTOR_MXM_ENUM_ID3 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_DP_C
+
+#define CONNECTOR_MXM_ENUM_ID4 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_DP_D
+
+#define CONNECTOR_MXM_ENUM_ID5 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_LVDS_TXxx
+
+#define CONNECTOR_MXM_ENUM_ID6 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_LVDS_UXxx
+
+#define CONNECTOR_MXM_ENUM_ID7 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID7 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT) //Mapping to MXM_DAC
+
+#define CONNECTOR_LVDS_eDP_ENUM_ID1 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_LVDS_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_LVDS_eDP_ENUM_ID2 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ CONNECTOR_OBJECT_ID_LVDS_eDP << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Router Object ID definition - Shared with BIOS */
+/****************************************************/
+#define ROUTER_I2C_EXTENDER_CNTL_ENUM_ID1 ( GRAPH_OBJECT_TYPE_ROUTER << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL << OBJECT_ID_SHIFT)
+
+/* deleted */
+
+/****************************************************/
+/* Generic Object ID definition - Shared with BIOS */
+/****************************************************/
+#define GENERICOBJECT_GLSYNC_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_GLSYNC << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_PX2_NON_DRIVABLE_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_PX2_NON_DRIVABLE<< OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_PX2_NON_DRIVABLE_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_PX2_NON_DRIVABLE<< OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_MXM_OPM_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_MXM_OPM << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_STEREO_PIN_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+/****************************************************/
+/* Object Cap definition - Shared with BIOS */
+/****************************************************/
+#define GRAPHICS_OBJECT_CAP_I2C 0x00000001L
+#define GRAPHICS_OBJECT_CAP_TABLE_ID 0x00000002L
+
+
+#define GRAPHICS_OBJECT_I2CCOMMAND_TABLE_ID 0x01
+#define GRAPHICS_OBJECT_HOTPLUGDETECTIONINTERUPT_TABLE_ID 0x02
+#define GRAPHICS_OBJECT_ENCODER_OUTPUT_PROTECTION_TABLE_ID 0x03
+
+#if defined(_X86_)
+#pragma pack()
+#endif
+
+#endif /*GRAPHICTYPE */
+
+
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
new file mode 100644
index 000000000..2b97b8a96
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "aldebaran.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ adev->gmc.xgmi.connected_to_cpu))
+ return true;
+
+ return false;
+}
+
+static struct amdgpu_reset_handler *
+aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ dev_dbg(adev->dev, "Getting reset handler for method %d\n",
+ reset_context->method);
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (aldebaran_is_mode2_default(reset_ctl)) {
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
+ reset_context->method = AMD_RESET_METHOD_MODE2;
+ return handler;
+ }
+ }
+ }
+
+ dev_dbg(adev->dev, "Reset handler not found!\n");
+
+ return NULL;
+}
+
+static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+
+ if (r) {
+ dev_err(adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ return r;
+}
+
+static int
+aldebaran_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ dev_dbg(adev->dev, "Aldebaran prepare hw context\n");
+ /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+ if (!amdgpu_sriov_vf(adev))
+ r = aldebaran_mode2_suspend_ip(adev);
+
+ return r;
+}
+
+static void aldebaran_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+
+static int aldebaran_mode2_reset(struct amdgpu_device *adev)
+{
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ adev->asic_reset_res = amdgpu_dpm_mode2_reset(adev);
+ return adev->asic_reset_res;
+}
+
+static int
+aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r = 0;
+
+ dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
+ /* Wrong context, return error */
+ return -EINVAL;
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
+ }
+ /*
+ * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
+ * them together so that they can be completed asynchronously on multiple nodes
+ */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->reset_cntl->reset_work))
+ r = -EALREADY;
+ } else
+ r = aldebaran_mode2_reset(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->reset_cntl->reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_ip_block *cmn_block;
+ int ucode_count = 0;
+ int i, r;
+
+ dev_dbg(adev->dev, "Reloading ucodes after reset\n");
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+ if (!ucode->fw)
+ continue;
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ case AMDGPU_UCODE_ID_SDMA1:
+ case AMDGPU_UCODE_ID_SDMA2:
+ case AMDGPU_UCODE_ID_SDMA3:
+ case AMDGPU_UCODE_ID_SDMA4:
+ case AMDGPU_UCODE_ID_SDMA5:
+ case AMDGPU_UCODE_ID_SDMA6:
+ case AMDGPU_UCODE_ID_SDMA7:
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ case AMDGPU_UCODE_ID_RLC_G:
+ ucode_list[ucode_count++] = ucode;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Reinit NBIF block */
+ cmn_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_COMMON);
+ if (unlikely(!cmn_block)) {
+ dev_err(adev->dev, "Failed to get BIF handle\n");
+ return -EINVAL;
+ }
+ r = cmn_block->version->funcs->resume(adev);
+ if (r)
+ return r;
+
+ /* Reinit GFXHUB */
+ adev->gfxhub.funcs->init(adev);
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r) {
+ dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+ return r;
+ }
+
+ /* Reload GFX firmware */
+ r = psp_load_fw_list(&adev->psp, ucode_list, ucode_count);
+ if (r) {
+ dev_err(adev->dev, "GFX ucode load failed after reset\n");
+ return r;
+ }
+
+ /* Resume RLC, FW needs RLC alive to complete reset process */
+ adev->gfx.rlc.funcs->resume(adev);
+
+ /* Wait for FW reset event complete */
+ r = amdgpu_dpm_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to get response from firmware after reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_COMMON))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ (void *)adev);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_ras_set_error_query_ready(adev, true);
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
+ if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
+ IP_VERSION(13, 0, 2) &&
+ reset_context->hive == NULL) {
+ /* Wrong context, return error */
+ return -EINVAL;
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = aldebaran_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ /* Update PSP FW topology after reset */
+ if (reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(reset_context->hive,
+ tmp_adev);
+
+ if (!r) {
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ tmp_adev->asic_reset_res = r;
+ goto end;
+ }
+ }
+ }
+
+end:
+ return r;
+}
+
+static struct amdgpu_reset_handler aldebaran_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = aldebaran_mode2_prepare_hwcontext,
+ .perform_reset = aldebaran_mode2_perform_reset,
+ .restore_hwcontext = aldebaran_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = aldebaran_mode2_reset,
+};
+
+int aldebaran_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = aldebaran_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
+
+ INIT_LIST_HEAD(&reset_ctl->reset_handlers);
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler);
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int aldebaran_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.h b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
new file mode 100644
index 000000000..a07db5454
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ALDEBARAN_H__
+#define __ALDEBARAN_H__
+
+#include "amdgpu.h"
+
+int aldebaran_reset_init(struct amdgpu_device *adev);
+int aldebaran_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
new file mode 100644
index 000000000..28e6c9ab8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "aldebaran_ip_offset.h"
+
+int aldebaran_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the block needed by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+ adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
new file mode 100644
index 000000000..a79d53bdb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -0,0 +1,1511 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef __AMDGPU_H__
+#define __AMDGPU_H__
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "amdgpu: " fmt
+
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+
+#define dev_fmt(fmt) "amdgpu: " fmt
+
+#include "amdgpu_ctx.h"
+
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+#include <linux/hashtable.h>
+#include <linux/dma-fence.h>
+#include <linux/pci.h>
+
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+
+#include <kgd_kfd_interface.h>
+#include "dm_pp_interface.h"
+#include "kgd_pp_interface.h"
+
+#include "amd_shared.h"
+#include "amdgpu_mode.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_irq.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_ttm.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_gds.h"
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_acp.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_lsdma.h"
+#include "amdgpu_nbio.h"
+#include "amdgpu_hdp.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_csa.h"
+#include "amdgpu_mes_ctx.h"
+#include "amdgpu_gart.h"
+#include "amdgpu_debugfs.h"
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_doorbell.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_discovery.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_mmhub.h"
+#include "amdgpu_gfxhub.h"
+#include "amdgpu_df.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_mca.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xcp.h"
+
+#define MAX_GPU_INSTANCE 64
+
+struct amdgpu_gpu_instance
+{
+ struct amdgpu_device *adev;
+ int mgpu_fan_enabled;
+};
+
+struct amdgpu_mgpu_info
+{
+ struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
+ struct mutex mutex;
+ uint32_t num_gpu;
+ uint32_t num_dgpu;
+ uint32_t num_apu;
+
+ /* delayed reset_func for XGMI configuration if necessary */
+ struct delayed_work delayed_reset_work;
+ bool pending_reset;
+};
+
+enum amdgpu_ss {
+ AMDGPU_SS_DRV_LOAD,
+ AMDGPU_SS_DEV_D0,
+ AMDGPU_SS_DEV_D3,
+ AMDGPU_SS_DRV_UNLOAD
+};
+
+struct amdgpu_watchdog_timer
+{
+ bool timeout_fatal_disable;
+ uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
+};
+
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256
+
+/*
+ * Modules parameters.
+ */
+extern int amdgpu_modeset;
+extern unsigned int amdgpu_vram_limit;
+extern int amdgpu_vis_vram_limit;
+extern int amdgpu_gart_size;
+extern int amdgpu_gtt_size;
+extern int amdgpu_moverate;
+extern int amdgpu_audio;
+extern int amdgpu_disp_priority;
+extern int amdgpu_hw_i2c;
+extern int amdgpu_pcie_gen2;
+extern int amdgpu_msi;
+extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
+extern int amdgpu_dpm;
+extern int amdgpu_fw_load_type;
+extern int amdgpu_aspm;
+extern int amdgpu_runtime_pm;
+extern uint amdgpu_ip_block_mask;
+extern int amdgpu_bapm;
+extern int amdgpu_deep_color;
+extern int amdgpu_vm_size;
+extern int amdgpu_vm_block_size;
+extern int amdgpu_vm_fragment_size;
+extern int amdgpu_vm_fault_stop;
+extern int amdgpu_vm_debug;
+extern int amdgpu_vm_update_mode;
+extern int amdgpu_exp_hw_support;
+extern int amdgpu_dc;
+extern int amdgpu_sched_jobs;
+extern int amdgpu_sched_hw_submission;
+extern uint amdgpu_pcie_gen_cap;
+extern uint amdgpu_pcie_lane_cap;
+extern u64 amdgpu_cg_mask;
+extern uint amdgpu_pg_mask;
+extern uint amdgpu_sdma_phase_quantum;
+extern char *amdgpu_disable_cu;
+extern char *amdgpu_virtual_display;
+extern uint amdgpu_pp_feature_mask;
+extern uint amdgpu_force_long_training;
+extern int amdgpu_lbpw;
+extern int amdgpu_compute_multipipe;
+extern int amdgpu_gpu_recovery;
+extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
+extern int amdgpu_smu_pptable_id;
+extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_dc_debug_mask;
+extern uint amdgpu_dc_visual_confirm;
+extern uint amdgpu_dm_abm_level;
+extern int amdgpu_backlight;
+extern struct amdgpu_mgpu_info mgpu_info;
+extern int amdgpu_ras_enable;
+extern uint amdgpu_ras_mask;
+extern int amdgpu_bad_page_threshold;
+extern bool amdgpu_ignore_bad_page_threshold;
+extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
+extern int amdgpu_async_gfx_ring;
+extern int amdgpu_mcbp;
+extern int amdgpu_discovery;
+extern int amdgpu_mes;
+extern int amdgpu_mes_kiq;
+extern int amdgpu_noretry;
+extern int amdgpu_force_asic_type;
+extern int amdgpu_smartshift_bias;
+extern int amdgpu_use_xgmi_p2p;
+extern int amdgpu_mtype_local;
+extern bool enforce_isolation;
+#ifdef CONFIG_HSA_AMD
+extern int sched_policy;
+extern bool debug_evictions;
+extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
+#else
+static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool __maybe_unused debug_evictions; /* = false */
+static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
+#endif
+#ifdef CONFIG_HSA_AMD_P2P
+extern bool pcie_p2p;
+#endif
+
+extern int amdgpu_tmz;
+extern int amdgpu_reset_method;
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+extern int amdgpu_si_support;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern int amdgpu_cik_support;
+#endif
+extern int amdgpu_num_kcq;
+
+#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
+extern int amdgpu_vcnfw_log;
+extern int amdgpu_sg_display;
+
+extern int amdgpu_user_partt_mode;
+
+#define AMDGPU_VM_MAX_NUM_CTX 4096
+#define AMDGPU_SG_THRESHOLD (256*1024*1024)
+#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
+#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
+#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_DEBUGFS_MAX_COMPONENTS 32
+#define AMDGPUFB_CONN_LIMIT 4
+#define AMDGPU_BIOS_NUM_SCRATCH 16
+
+#define AMDGPU_VBIOS_VGA_ALLOCATION (9 * 1024 * 1024) /* reserve 8MB for vga emulator and 1 MB for FB */
+
+/* hard reset data */
+#define AMDGPU_ASIC_RESET_DATA 0x39d5e86b
+
+/* reset flags */
+#define AMDGPU_RESET_GFX (1 << 0)
+#define AMDGPU_RESET_COMPUTE (1 << 1)
+#define AMDGPU_RESET_DMA (1 << 2)
+#define AMDGPU_RESET_CP (1 << 3)
+#define AMDGPU_RESET_GRBM (1 << 4)
+#define AMDGPU_RESET_DMA1 (1 << 5)
+#define AMDGPU_RESET_RLC (1 << 6)
+#define AMDGPU_RESET_SEM (1 << 7)
+#define AMDGPU_RESET_IH (1 << 8)
+#define AMDGPU_RESET_VMC (1 << 9)
+#define AMDGPU_RESET_MC (1 << 10)
+#define AMDGPU_RESET_DISPLAY (1 << 11)
+#define AMDGPU_RESET_UVD (1 << 12)
+#define AMDGPU_RESET_VCE (1 << 13)
+#define AMDGPU_RESET_VCE1 (1 << 14)
+
+/* max cursor sizes (in pixels) */
+#define CIK_CURSOR_WIDTH 128
+#define CIK_CURSOR_HEIGHT 128
+
+/* smart shift bias level limits */
+#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
+#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+
+/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
+#define AMDGPU_SWCTF_EXTRA_DELAY 50
+
+struct amdgpu_xcp_mgr;
+struct amdgpu_device;
+struct amdgpu_irq_src;
+struct amdgpu_fpriv;
+struct amdgpu_bo_va_mapping;
+struct kfd_vm_fault_info;
+struct amdgpu_hive_info;
+struct amdgpu_reset_context;
+struct amdgpu_reset_control;
+
+enum amdgpu_cp_irq {
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP,
+ AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP,
+
+ AMDGPU_CP_IRQ_LAST
+};
+
+enum amdgpu_thermal_irq {
+ AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
+ AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
+
+ AMDGPU_THERMAL_IRQ_LAST
+};
+
+enum amdgpu_kiq_irq {
+ AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
+ AMDGPU_CP_KIQ_IRQ_LAST
+};
+#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
+#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 1000
+
+int amdgpu_device_ip_set_clockgating_state(void *dev,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state);
+int amdgpu_device_ip_set_powergating_state(void *dev,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state);
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags);
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+
+#define AMDGPU_MAX_IP_NUM 16
+
+struct amdgpu_ip_block_status {
+ bool valid;
+ bool sw;
+ bool hw;
+ bool late_initialized;
+ bool hang;
+};
+
+struct amdgpu_ip_block_version {
+ const enum amd_ip_block_type type;
+ const u32 major;
+ const u32 minor;
+ const u32 rev;
+ const struct amd_ip_funcs *funcs;
+};
+
+#define HW_REV(_Major, _Minor, _Rev) \
+ ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
+
+struct amdgpu_ip_block {
+ struct amdgpu_ip_block_status status;
+ const struct amdgpu_ip_block_version *version;
+};
+
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+ enum amd_ip_block_type type,
+ u32 major, u32 minor);
+
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+ enum amd_ip_block_type type);
+
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+ const struct amdgpu_ip_block_version *ip_block_version);
+
+/*
+ * BIOS.
+ */
+bool amdgpu_get_bios(struct amdgpu_device *adev);
+bool amdgpu_read_bios(struct amdgpu_device *adev);
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes);
+/*
+ * Clocks
+ */
+
+#define AMDGPU_MAX_PPLL 3
+
+struct amdgpu_clock {
+ struct amdgpu_pll ppll[AMDGPU_MAX_PPLL];
+ struct amdgpu_pll spll;
+ struct amdgpu_pll mpll;
+ /* 10 Khz units */
+ uint32_t default_mclk;
+ uint32_t default_sclk;
+ uint32_t default_dispclk;
+ uint32_t current_dispclk;
+ uint32_t dp_extclk;
+ uint32_t max_pixel_clock;
+};
+
+/* sub-allocation manager, it has to be protected by another lock.
+ * By conception this is an helper for other part of the driver
+ * like the indirect buffer or semaphore, which both have their
+ * locking.
+ *
+ * Principe is simple, we keep a list of sub allocation in offset
+ * order (first entry has offset == 0, last entry has the highest
+ * offset).
+ *
+ * When allocating new object we first check if there is room at
+ * the end total_size - (last_object_offset + last_object_size) >=
+ * alloc_size. If so we allocate new object there.
+ *
+ * When there is not enough room at the end, we start waiting for
+ * each sub object until we reach object_offset+object_size >=
+ * alloc_size, this object then become the sub object we return.
+ *
+ * Alignment can't be bigger than page size.
+ *
+ * Hole are not considered for allocation to keep things simple.
+ * Assumption is that there won't be hole (all object on same
+ * alignment).
+ */
+
+struct amdgpu_sa_manager {
+ struct drm_suballoc_manager base;
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ void *cpu_ptr;
+};
+
+int amdgpu_fence_slab_init(void);
+void amdgpu_fence_slab_fini(void);
+
+/*
+ * IRQS.
+ */
+
+struct amdgpu_flip_work {
+ struct delayed_work flip_work;
+ struct work_struct unpin_work;
+ struct amdgpu_device *adev;
+ int crtc_id;
+ u32 target_vblank;
+ uint64_t base;
+ struct drm_pending_vblank_event *event;
+ struct amdgpu_bo *old_abo;
+ unsigned shared_count;
+ struct dma_fence **shared;
+ struct dma_fence_cb cb;
+ bool async;
+};
+
+
+/*
+ * file private structure
+ */
+
+struct amdgpu_fpriv {
+ struct amdgpu_vm vm;
+ struct amdgpu_bo_va *prt_va;
+ struct amdgpu_bo_va *csa_va;
+ struct mutex bo_list_lock;
+ struct idr bo_list_handles;
+ struct amdgpu_ctx_mgr ctx_mgr;
+ /** GPU partition selection */
+ uint32_t xcp_id;
+};
+
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
+/*
+ * Writeback
+ */
+#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+
+struct amdgpu_wb {
+ struct amdgpu_bo *wb_obj;
+ volatile uint32_t *wb;
+ uint64_t gpu_addr;
+ u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
+ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+};
+
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
+
+/*
+ * Benchmarking
+ */
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
+
+/*
+ * ASIC specific register table accessible by UMD
+ */
+struct amdgpu_allowed_register_entry {
+ uint32_t reg_offset;
+ bool grbm_indexed;
+};
+
+enum amd_reset_method {
+ AMD_RESET_METHOD_NONE = -1,
+ AMD_RESET_METHOD_LEGACY = 0,
+ AMD_RESET_METHOD_MODE0,
+ AMD_RESET_METHOD_MODE1,
+ AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_BACO,
+ AMD_RESET_METHOD_PCI,
+};
+
+struct amdgpu_video_codec_info {
+ u32 codec_type;
+ u32 max_width;
+ u32 max_height;
+ u32 max_pixels_per_frame;
+ u32 max_level;
+};
+
+#define codec_info_build(type, width, height, level) \
+ .codec_type = type,\
+ .max_width = width,\
+ .max_height = height,\
+ .max_pixels_per_frame = height * width,\
+ .max_level = level,
+
+struct amdgpu_video_codecs {
+ const u32 codec_count;
+ const struct amdgpu_video_codec_info *codec_array;
+};
+
+/*
+ * ASIC specific functions.
+ */
+struct amdgpu_asic_funcs {
+ bool (*read_disabled_bios)(struct amdgpu_device *adev);
+ bool (*read_bios_from_rom)(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes);
+ int (*read_register)(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value);
+ void (*set_vga_state)(struct amdgpu_device *adev, bool state);
+ int (*reset)(struct amdgpu_device *adev);
+ enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
+ /* get the reference clock */
+ u32 (*get_xclk)(struct amdgpu_device *adev);
+ /* MM block clocks */
+ int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
+ int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+ /* static power management */
+ int (*get_pcie_lanes)(struct amdgpu_device *adev);
+ void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
+ /* get config memsize register */
+ u32 (*get_config_memsize)(struct amdgpu_device *adev);
+ /* flush hdp write queue */
+ void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+ /* invalidate hdp read cache */
+ void (*invalidate_hdp)(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+ /* check if the asic needs a full reset of if soft reset will work */
+ bool (*need_full_reset)(struct amdgpu_device *adev);
+ /* initialize doorbell layout for specific asic*/
+ void (*init_doorbell_index)(struct amdgpu_device *adev);
+ /* PCIe bandwidth usage */
+ void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1);
+ /* do we need to reset the asic at init time (e.g., kexec) */
+ bool (*need_reset_on_init)(struct amdgpu_device *adev);
+ /* PCIe replay counter */
+ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ /* device supports BACO */
+ bool (*supports_baco)(struct amdgpu_device *adev);
+ /* pre asic_init quirks */
+ void (*pre_asic_init)(struct amdgpu_device *adev);
+ /* enter/exit umd stable pstate */
+ int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
+ /* query video codecs */
+ int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs);
+ /* encode "> 32bits" smn addressing */
+ u64 (*encode_ext_smn_addressing)(int ext_id);
+};
+
+/*
+ * IOCTL.
+ */
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+/* VRAM scratch page for HDP bug, default vram page */
+struct amdgpu_mem_scratch {
+ struct amdgpu_bo *robj;
+ volatile uint32_t *ptr;
+ u64 gpu_addr;
+};
+
+/*
+ * CGS
+ */
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
+
+/*
+ * Core structure, functions and helpers.
+ */
+typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
+
+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+
+typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
+
+struct amdgpu_mmio_remap {
+ u32 reg_offset;
+ resource_size_t bus_addr;
+};
+
+/* Define the HW IP blocks will be used in driver , add more if necessary */
+enum amd_hw_ip_block_type {
+ GC_HWIP = 1,
+ HDP_HWIP,
+ SDMA0_HWIP,
+ SDMA1_HWIP,
+ SDMA2_HWIP,
+ SDMA3_HWIP,
+ SDMA4_HWIP,
+ SDMA5_HWIP,
+ SDMA6_HWIP,
+ SDMA7_HWIP,
+ LSDMA_HWIP,
+ MMHUB_HWIP,
+ ATHUB_HWIP,
+ NBIO_HWIP,
+ MP0_HWIP,
+ MP1_HWIP,
+ UVD_HWIP,
+ VCN_HWIP = UVD_HWIP,
+ JPEG_HWIP = VCN_HWIP,
+ VCN1_HWIP,
+ VCE_HWIP,
+ DF_HWIP,
+ DCE_HWIP,
+ OSSSYS_HWIP,
+ SMUIO_HWIP,
+ PWR_HWIP,
+ NBIF_HWIP,
+ THM_HWIP,
+ CLK_HWIP,
+ UMC_HWIP,
+ RSMU_HWIP,
+ XGMI_HWIP,
+ DCI_HWIP,
+ PCIE_HWIP,
+ MAX_HWIP
+};
+
+#define HWIP_MAX_INSTANCE 44
+
+#define HW_ID_MAX 300
+#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
+#define IP_VERSION_MAJ(ver) ((ver) >> 16)
+#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_REV(ver) ((ver) & 0xFF)
+
+struct amdgpu_ip_map_info {
+ /* Map of logical to actual dev instances/mask */
+ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+ int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst);
+ uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask);
+};
+
+struct amd_powerplay {
+ void *pp_handle;
+ const struct amd_pm_funcs *pp_funcs;
+};
+
+struct ip_discovery_top;
+
+/* polaris10 kickers */
+#define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \
+ ((rid == 0xE3) || \
+ (rid == 0xE4) || \
+ (rid == 0xE5) || \
+ (rid == 0xE7) || \
+ (rid == 0xEF))) || \
+ ((did == 0x6FDF) && \
+ ((rid == 0xE7) || \
+ (rid == 0xEF) || \
+ (rid == 0xFF))))
+
+#define ASICID_IS_P30(did, rid) ((did == 0x67DF) && \
+ ((rid == 0xE1) || \
+ (rid == 0xF7)))
+
+/* polaris11 kickers */
+#define ASICID_IS_P21(did, rid) (((did == 0x67EF) && \
+ ((rid == 0xE0) || \
+ (rid == 0xE5))) || \
+ ((did == 0x67FF) && \
+ ((rid == 0xCF) || \
+ (rid == 0xEF) || \
+ (rid == 0xFF))))
+
+#define ASICID_IS_P31(did, rid) ((did == 0x67EF) && \
+ ((rid == 0xE2)))
+
+/* polaris12 kickers */
+#define ASICID_IS_P23(did, rid) (((did == 0x6987) && \
+ ((rid == 0xC0) || \
+ (rid == 0xC1) || \
+ (rid == 0xC3) || \
+ (rid == 0xC7))) || \
+ ((did == 0x6981) && \
+ ((rid == 0x00) || \
+ (rid == 0x01) || \
+ (rid == 0x10))))
+
+struct amdgpu_mqd_prop {
+ uint64_t mqd_gpu_addr;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint32_t queue_size;
+ bool use_doorbell;
+ uint32_t doorbell_index;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool hqd_active;
+};
+
+struct amdgpu_mqd {
+ unsigned mqd_size;
+ int (*init_mqd)(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *p);
+};
+
+#define AMDGPU_RESET_MAGIC_NUM 64
+#define AMDGPU_MAX_DF_PERFMONS 4
+#define AMDGPU_PRODUCT_NAME_LEN 64
+struct amdgpu_reset_domain;
+
+/*
+ * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
+ */
+#define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size)
+
+struct amdgpu_device {
+ struct device *dev;
+ struct pci_dev *pdev;
+ struct drm_device ddev;
+
+#ifdef CONFIG_DRM_AMD_ACP
+ struct amdgpu_acp acp;
+#endif
+ struct amdgpu_hive_info *hive;
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ /* ASIC */
+ enum amd_asic_type asic_type;
+ uint32_t family;
+ uint32_t rev_id;
+ uint32_t external_rev_id;
+ unsigned long flags;
+ unsigned long apu_flags;
+ int usec_timeout;
+ const struct amdgpu_asic_funcs *asic_funcs;
+ bool shutdown;
+ bool need_swiotlb;
+ bool accel_working;
+ struct notifier_block acpi_nb;
+ struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
+ struct debugfs_blob_wrapper debugfs_discovery_blob;
+ struct mutex srbm_mutex;
+ /* GRBM index mutex. Protects concurrent access to GRBM index */
+ struct mutex grbm_idx_mutex;
+ struct dev_pm_domain vga_pm_domain;
+ bool have_disp_power_ref;
+ bool have_atomics_support;
+
+ /* BIOS */
+ bool is_atom_fw;
+ uint8_t *bios;
+ uint32_t bios_size;
+ uint32_t bios_scratch_reg_offset;
+ uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
+
+ /* Register/doorbell mmio */
+ resource_size_t rmmio_base;
+ resource_size_t rmmio_size;
+ void __iomem *rmmio;
+ /* protects concurrent MM_INDEX/DATA based register access */
+ spinlock_t mmio_idx_lock;
+ struct amdgpu_mmio_remap rmmio_remap;
+ /* protects concurrent SMC based register access */
+ spinlock_t smc_idx_lock;
+ amdgpu_rreg_t smc_rreg;
+ amdgpu_wreg_t smc_wreg;
+ /* protects concurrent PCIE register access */
+ spinlock_t pcie_idx_lock;
+ amdgpu_rreg_t pcie_rreg;
+ amdgpu_wreg_t pcie_wreg;
+ amdgpu_rreg_t pciep_rreg;
+ amdgpu_wreg_t pciep_wreg;
+ amdgpu_rreg_ext_t pcie_rreg_ext;
+ amdgpu_wreg_ext_t pcie_wreg_ext;
+ amdgpu_rreg64_t pcie_rreg64;
+ amdgpu_wreg64_t pcie_wreg64;
+ /* protects concurrent UVD register access */
+ spinlock_t uvd_ctx_idx_lock;
+ amdgpu_rreg_t uvd_ctx_rreg;
+ amdgpu_wreg_t uvd_ctx_wreg;
+ /* protects concurrent DIDT register access */
+ spinlock_t didt_idx_lock;
+ amdgpu_rreg_t didt_rreg;
+ amdgpu_wreg_t didt_wreg;
+ /* protects concurrent gc_cac register access */
+ spinlock_t gc_cac_idx_lock;
+ amdgpu_rreg_t gc_cac_rreg;
+ amdgpu_wreg_t gc_cac_wreg;
+ /* protects concurrent se_cac register access */
+ spinlock_t se_cac_idx_lock;
+ amdgpu_rreg_t se_cac_rreg;
+ amdgpu_wreg_t se_cac_wreg;
+ /* protects concurrent ENDPOINT (audio) register access */
+ spinlock_t audio_endpt_idx_lock;
+ amdgpu_block_rreg_t audio_endpt_rreg;
+ amdgpu_block_wreg_t audio_endpt_wreg;
+ struct amdgpu_doorbell doorbell;
+
+ /* clock/pll info */
+ struct amdgpu_clock clock;
+
+ /* MC */
+ struct amdgpu_gmc gmc;
+ struct amdgpu_gart gart;
+ dma_addr_t dummy_page_addr;
+ struct amdgpu_vm_manager vm_manager;
+ struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
+ DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
+
+ /* memory management */
+ struct amdgpu_mman mman;
+ struct amdgpu_mem_scratch mem_scratch;
+ struct amdgpu_wb wb;
+ atomic64_t num_bytes_moved;
+ atomic64_t num_evictions;
+ atomic64_t num_vram_cpu_page_faults;
+ atomic_t gpu_reset_counter;
+ atomic_t vram_lost_counter;
+
+ /* data for buffer migration throttling */
+ struct {
+ spinlock_t lock;
+ s64 last_update_us;
+ s64 accum_us; /* accumulated microseconds */
+ s64 accum_us_vis; /* for visible VRAM */
+ u32 log2_max_MBps;
+ } mm_stats;
+
+ /* display */
+ bool enable_virtual_display;
+ struct amdgpu_vkms_output *amdgpu_vkms_output;
+ struct amdgpu_mode_info mode_info;
+ /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
+ struct delayed_work hotplug_work;
+ struct amdgpu_irq_src crtc_irq;
+ struct amdgpu_irq_src vline0_irq;
+ struct amdgpu_irq_src vupdate_irq;
+ struct amdgpu_irq_src pageflip_irq;
+ struct amdgpu_irq_src hpd_irq;
+ struct amdgpu_irq_src dmub_trace_irq;
+ struct amdgpu_irq_src dmub_outbox_irq;
+
+ /* rings */
+ u64 fence_context;
+ unsigned num_rings;
+ struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct dma_fence __rcu *gang_submit;
+ bool ib_pool_ready;
+ struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX];
+ struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+
+ /* interrupts */
+ struct amdgpu_irq irq;
+
+ /* powerplay */
+ struct amd_powerplay powerplay;
+ struct amdgpu_pm pm;
+ u64 cg_flags;
+ u32 pg_flags;
+
+ /* nbio */
+ struct amdgpu_nbio nbio;
+
+ /* hdp */
+ struct amdgpu_hdp hdp;
+
+ /* smuio */
+ struct amdgpu_smuio smuio;
+
+ /* mmhub */
+ struct amdgpu_mmhub mmhub;
+
+ /* gfxhub */
+ struct amdgpu_gfxhub gfxhub;
+
+ /* gfx */
+ struct amdgpu_gfx gfx;
+
+ /* sdma */
+ struct amdgpu_sdma sdma;
+
+ /* lsdma */
+ struct amdgpu_lsdma lsdma;
+
+ /* uvd */
+ struct amdgpu_uvd uvd;
+
+ /* vce */
+ struct amdgpu_vce vce;
+
+ /* vcn */
+ struct amdgpu_vcn vcn;
+
+ /* jpeg */
+ struct amdgpu_jpeg jpeg;
+
+ /* firmwares */
+ struct amdgpu_firmware firmware;
+
+ /* PSP */
+ struct psp_context psp;
+
+ /* GDS */
+ struct amdgpu_gds gds;
+
+ /* KFD */
+ struct amdgpu_kfd_dev kfd;
+
+ /* UMC */
+ struct amdgpu_umc umc;
+
+ /* display related functionality */
+ struct amdgpu_display_manager dm;
+
+ /* mes */
+ bool enable_mes;
+ bool enable_mes_kiq;
+ struct amdgpu_mes mes;
+ struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+
+ /* df */
+ struct amdgpu_df df;
+
+ /* MCA */
+ struct amdgpu_mca mca;
+
+ struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
+ uint32_t harvest_ip_mask;
+ int num_ip_blocks;
+ struct mutex mn_lock;
+ DECLARE_HASHTABLE(mn_hash, 7);
+
+ /* tracking pinned memory */
+ atomic64_t vram_pin_size;
+ atomic64_t visible_pin_size;
+ atomic64_t gart_pin_size;
+
+ /* soc15 register offset based on ip, instance and segment */
+ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+ struct amdgpu_ip_map_info ip_map;
+
+ /* delayed work_func for deferring clockgating during resume */
+ struct delayed_work delayed_init_work;
+
+ struct amdgpu_virt virt;
+
+ /* link all shadow bo */
+ struct list_head shadow_list;
+ struct mutex shadow_list_lock;
+
+ /* record hw reset is performed */
+ bool has_hw_reset;
+ u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
+
+ /* s3/s4 mask */
+ bool in_suspend;
+ bool in_s3;
+ bool in_s4;
+ bool in_s0ix;
+
+ enum pp_mp1_state mp1_state;
+ struct amdgpu_doorbell_index doorbell_index;
+
+ struct mutex notifier_lock;
+
+ int asic_reset_res;
+ struct work_struct xgmi_reset_work;
+ struct list_head reset_list;
+
+ long gfx_timeout;
+ long sdma_timeout;
+ long video_timeout;
+ long compute_timeout;
+
+ uint64_t unique_id;
+ uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* enable runtime pm on the device */
+ bool in_runpm;
+ bool has_pr3;
+
+ bool ucode_sysfs_en;
+
+ /* Chip product information */
+ char product_number[20];
+ char product_name[AMDGPU_PRODUCT_NAME_LEN];
+ char serial[20];
+
+ atomic_t throttling_logging_enabled;
+ struct ratelimit_state throttling_logging_rs;
+ uint32_t ras_hw_enabled;
+ uint32_t ras_enabled;
+
+ bool no_hw_access;
+ struct pci_saved_state *pci_state;
+ pci_channel_state_t pci_channel_state;
+
+ /* Track auto wait count on s_barrier settings */
+ bool barrier_has_auto_waitcnt;
+
+ struct amdgpu_reset_control *reset_cntl;
+ uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+ bool ram_is_direct_mapped;
+
+ struct list_head ras_list;
+
+ struct ip_discovery_top *ip_top;
+
+ struct amdgpu_reset_domain *reset_domain;
+
+ struct mutex benchmark_mutex;
+
+ /* reset dump register */
+ uint32_t *reset_dump_reg_list;
+ uint32_t *reset_dump_reg_value;
+ int num_regs;
+#ifdef CONFIG_DEV_COREDUMP
+ struct amdgpu_task_info reset_task_info;
+ bool reset_vram_lost;
+ struct timespec64 reset_time;
+#endif
+
+ bool scpm_enabled;
+ uint32_t scpm_status;
+
+ struct work_struct reset_work;
+
+ bool job_hang;
+ bool dc_enabled;
+ /* Mask of active clusters */
+ uint32_t aid_mask;
+};
+
+static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+{
+ return container_of(ddev, struct amdgpu_device, ddev);
+}
+
+static inline struct drm_device *adev_to_drm(struct amdgpu_device *adev)
+{
+ return &adev->ddev;
+}
+
+static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
+{
+ return container_of(bdev, struct amdgpu_device, mman.bdev);
+}
+
+int amdgpu_device_init(struct amdgpu_device *adev,
+ uint32_t flags);
+void amdgpu_device_fini_hw(struct amdgpu_device *adev);
+void amdgpu_device_fini_sw(struct amdgpu_device *adev);
+
+int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write);
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask);
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
+void amdgpu_device_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data);
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v, uint32_t xcc_id);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+ u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
+ u32 reg_addr);
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
+ u32 reg_addr, u32 reg_data);
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
+ u32 reg_addr, u64 reg_data);
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
+
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context);
+
+int emu_soc_asic_init(struct amdgpu_device *adev);
+
+/*
+ * Registers read & write functions.
+ */
+#define AMDGPU_REGS_NO_KIQ (1<<1)
+#define AMDGPU_REGS_RLC (1<<2)
+
+#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
+#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
+#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0))
+#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
+#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
+#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
+#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
+#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
+#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
+#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
+#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
+#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
+#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
+#define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
+#define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
+#define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
+#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg))
+#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v))
+#define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
+#define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
+#define WREG32_P(reg, val, mask) \
+ do { \
+ uint32_t tmp_ = RREG32(reg); \
+ tmp_ &= (mask); \
+ tmp_ |= ((val) & ~(mask)); \
+ WREG32(reg, tmp_); \
+ } while (0)
+#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+#define WREG32_PLL_P(reg, val, mask) \
+ do { \
+ uint32_t tmp_ = RREG32_PLL(reg); \
+ tmp_ &= (mask); \
+ tmp_ |= ((val) & ~(mask)); \
+ WREG32_PLL(reg, tmp_); \
+ } while (0)
+
+#define WREG32_SMC_P(_Reg, _Val, _Mask) \
+ do { \
+ u32 tmp = RREG32_SMC(_Reg); \
+ tmp &= (_Mask); \
+ tmp |= ((_Val) & ~(_Mask)); \
+ WREG32_SMC(_Reg, tmp); \
+ } while (0)
+
+#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
+
+#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
+#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
+
+#define REG_SET_FIELD(orig_val, reg, field, field_val) \
+ (((orig_val) & ~REG_FIELD_MASK(reg, field)) | \
+ (REG_FIELD_MASK(reg, field) & ((field_val) << REG_FIELD_SHIFT(reg, field))))
+
+#define REG_GET_FIELD(value, reg, field) \
+ (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_FIELD(reg, field, val) \
+ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
+ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+/*
+ * BIOS helpers.
+ */
+#define RBIOS8(i) (adev->bios[i])
+#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
+#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
+
+/*
+ * ASICs macro.
+ */
+#define amdgpu_asic_set_vga_state(adev, state) \
+ ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
+#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
+#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
+#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
+#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
+#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
+#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
+#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
+#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
+#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
+#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
+#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
+#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
+#define amdgpu_asic_flush_hdp(adev, r) \
+ ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
+#define amdgpu_asic_invalidate_hdp(adev, r) \
+ ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
+ ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
+#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
+#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
+#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
+#define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev))
+#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \
+ ((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
+#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
+
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
+
+#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
+#define for_each_inst(i, inst_mask) \
+ for (i = ffs(inst_mask); i-- != 0; \
+ i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))
+
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
+/* Common functions */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context);
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
+int amdgpu_device_pci_reset(struct amdgpu_device *adev);
+bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_device_pcie_dynamic_switching_supported(void);
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
+bool amdgpu_device_aspm_support_quirk(void);
+
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+ u64 num_vis_bytes);
+int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
+ const u32 *registers,
+ const u32 array_size);
+
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct drm_device *dev);
+bool amdgpu_device_supports_px(struct drm_device *dev);
+bool amdgpu_device_supports_boco(struct drm_device *dev);
+bool amdgpu_device_supports_smart_shift(struct drm_device *dev);
+bool amdgpu_device_supports_baco(struct drm_device *dev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
+int amdgpu_device_baco_enter(struct drm_device *dev);
+int amdgpu_device_baco_exit(struct drm_device *dev);
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+
+void amdgpu_device_halt(struct amdgpu_device *adev);
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg);
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang);
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+
+/* atpx handler */
+#if defined(CONFIG_VGA_SWITCHEROO)
+void amdgpu_register_atpx_handler(void);
+void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
+bool amdgpu_atpx_dgpu_req_power_for_displays(void);
+bool amdgpu_has_atpx(void);
+#else
+static inline void amdgpu_register_atpx_handler(void) {}
+static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
+static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
+static inline bool amdgpu_has_atpx(void) { return false; }
+#endif
+
+#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void);
+#else
+static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
+#endif
+
+/*
+ * KMS
+ */
+extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
+extern const int amdgpu_max_kms_ioctl;
+
+int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags);
+void amdgpu_driver_unload_kms(struct drm_device *dev);
+void amdgpu_driver_lastclose_kms(struct drm_device *dev);
+int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
+void amdgpu_driver_postclose_kms(struct drm_device *dev,
+ struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
+int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
+void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
+int amdgpu_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+/*
+ * functions used by amdgpu_encoder.c
+ */
+struct amdgpu_afmt_acr {
+ u32 clock;
+
+ int n_32khz;
+ int cts_32khz;
+
+ int n_44_1khz;
+ int cts_44_1khz;
+
+ int n_48khz;
+ int cts_48khz;
+
+};
+
+struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
+
+/* amdgpu_acpi.c */
+
+struct amdgpu_numa_info {
+ uint64_t size;
+ int pxm;
+ int nid;
+};
+
+/* ATCS Device/Driver State */
+#define AMDGPU_ATCS_PSC_DEV_STATE_D0 0
+#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3
+#define AMDGPU_ATCS_PSC_DRV_STATE_OPR 0
+#define AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR 1
+
+#if defined(CONFIG_ACPI)
+int amdgpu_acpi_init(struct amdgpu_device *adev);
+void amdgpu_acpi_fini(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_power_shift_control_supported(void);
+int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
+ u8 perf_req, bool advertise);
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state);
+int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
+int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info);
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
+void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
+#else
+static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+ u64 *tmr_offset, u64 *tmr_size)
+{
+ return -EINVAL;
+}
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+ int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ return -EINVAL;
+}
+static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
+static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
+static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state) { return 0; }
+static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
+ enum amdgpu_ss ss_state) { return 0; }
+#endif
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+#else
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+#endif
+
+#if defined(CONFIG_DRM_AMD_DC)
+int amdgpu_dm_display_resume(struct amdgpu_device *adev );
+#else
+static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
+#endif
+
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
+pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state);
+pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev);
+pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev);
+void amdgpu_pci_resume(struct pci_dev *pdev);
+
+bool amdgpu_device_cache_pci_state(struct pci_dev *pdev);
+bool amdgpu_device_load_pci_state(struct pci_dev *pdev);
+
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev);
+
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+ enum amd_powergating_state state);
+
+static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
+{
+ return amdgpu_gpu_recovery != 0 &&
+ adev->gfx_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->compute_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->sdma_timeout != MAX_SCHEDULE_TIMEOUT &&
+ adev->video_timeout != MAX_SCHEDULE_TIMEOUT;
+}
+
+#include "amdgpu_object.h"
+
+static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
+{
+ return adev->gmc.tmz_enabled;
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
new file mode 100644
index 000000000..6d72355ac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/pci.h>
+#include <linux/pm_domain.h>
+#include <linux/platform_device.h>
+#include <sound/designware_i2s.h>
+#include <sound/pcm.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_acp.h"
+
+#include "acp_gfx_if.h"
+
+#define ST_JADEITE 1
+#define ACP_TILE_ON_MASK 0x03
+#define ACP_TILE_OFF_MASK 0x02
+#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
+#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20
+
+#define ACP_TILE_P1_MASK 0x3e
+#define ACP_TILE_P2_MASK 0x3d
+#define ACP_TILE_DSP0_MASK 0x3b
+#define ACP_TILE_DSP1_MASK 0x37
+
+#define ACP_TILE_DSP2_MASK 0x2f
+
+#define ACP_DMA_REGS_END 0x146c0
+#define ACP_I2S_PLAY_REGS_START 0x14840
+#define ACP_I2S_PLAY_REGS_END 0x148b4
+#define ACP_I2S_CAP_REGS_START 0x148b8
+#define ACP_I2S_CAP_REGS_END 0x1496c
+
+#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac
+#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8
+#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c
+#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68
+#define ACP_BT_PLAY_REGS_START 0x14970
+#define ACP_BT_PLAY_REGS_END 0x14a24
+#define ACP_BT_COMP1_REG_OFFSET 0xac
+#define ACP_BT_COMP2_REG_OFFSET 0xa8
+
+#define mmACP_PGFSM_RETAIN_REG 0x51c9
+#define mmACP_PGFSM_CONFIG_REG 0x51ca
+#define mmACP_PGFSM_READ_REG_0 0x51cc
+
+#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8
+#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9
+#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa
+#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb
+
+#define mmACP_CONTROL 0x5131
+#define mmACP_STATUS 0x5133
+#define mmACP_SOFT_RESET 0x5134
+#define ACP_CONTROL__ClkEn_MASK 0x1
+#define ACP_SOFT_RESET__SoftResetAud_MASK 0x100
+#define ACP_SOFT_RESET__SoftResetAudDone_MASK 0x1000000
+#define ACP_CLOCK_EN_TIME_OUT_VALUE 0x000000FF
+#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE 0x000000FF
+
+#define ACP_TIMEOUT_LOOP 0x000000FF
+#define ACP_DEVS 4
+#define ACP_SRC_ID 162
+
+static unsigned long acp_machine_id;
+
+enum {
+ ACP_TILE_P1 = 0,
+ ACP_TILE_P2,
+ ACP_TILE_DSP0,
+ ACP_TILE_DSP1,
+ ACP_TILE_DSP2,
+};
+
+static int acp_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->acp.parent = adev->dev;
+
+ adev->acp.cgs_device =
+ amdgpu_cgs_create_device(adev);
+ if (!adev->acp.cgs_device)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int acp_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->acp.cgs_device)
+ amdgpu_cgs_destroy_device(adev->acp.cgs_device);
+
+ return 0;
+}
+
+struct acp_pm_domain {
+ void *adev;
+ struct generic_pm_domain gpd;
+};
+
+static int acp_poweroff(struct generic_pm_domain *genpd)
+{
+ struct acp_pm_domain *apd;
+ struct amdgpu_device *adev;
+
+ apd = container_of(genpd, struct acp_pm_domain, gpd);
+ adev = apd->adev;
+ /* call smu to POWER GATE ACP block
+ * smu will
+ * 1. turn off the acp clock
+ * 2. power off the acp tiles
+ * 3. check and enter ulv state
+ */
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ return 0;
+}
+
+static int acp_poweron(struct generic_pm_domain *genpd)
+{
+ struct acp_pm_domain *apd;
+ struct amdgpu_device *adev;
+
+ apd = container_of(genpd, struct acp_pm_domain, gpd);
+ adev = apd->adev;
+ /* call smu to UNGATE ACP block
+ * smu will
+ * 1. exit ulv
+ * 2. turn on acp clock
+ * 3. power on acp tiles
+ */
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ return 0;
+}
+
+static int acp_genpd_add_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ int ret;
+
+ ret = pm_genpd_add_device(gpd, dev);
+ if (ret)
+ dev_err(dev, "Failed to add dev to genpd %d\n", ret);
+
+ return ret;
+}
+
+static int acp_genpd_remove_device(struct device *dev, void *data)
+{
+ int ret;
+
+ ret = pm_genpd_remove_device(dev);
+ if (ret)
+ dev_err(dev, "Failed to remove dev from genpd %d\n", ret);
+
+ /* Continue to remove */
+ return 0;
+}
+
+static int acp_quirk_cb(const struct dmi_system_id *id)
+{
+ acp_machine_id = ST_JADEITE;
+ return 1;
+}
+
+static const struct dmi_system_id acp_quirk_table[] = {
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
+ }
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
+ },
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
+ },
+ },
+ {}
+};
+
+/**
+ * acp_hw_init - start and test ACP block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ */
+static int acp_hw_init(void *handle)
+{
+ int r;
+ u64 acp_base;
+ u32 val = 0;
+ u32 count = 0;
+ struct i2s_platform_data *i2s_pdata = NULL;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ const struct amdgpu_ip_block *ip_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
+
+ if (!ip_block)
+ return -EINVAL;
+
+ r = amd_acp_hw_init(adev->acp.cgs_device,
+ ip_block->version->major, ip_block->version->minor);
+ /* -ENODEV means board uses AZ rather than ACP */
+ if (r == -ENODEV) {
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ return 0;
+ } else if (r) {
+ return r;
+ }
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ acp_base = adev->rmmio_base;
+ adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+ if (!adev->acp.acp_genpd)
+ return -ENOMEM;
+
+ adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+ adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+ adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+ adev->acp.acp_genpd->adev = adev;
+
+ pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+ dmi_check_system(acp_quirk_table);
+ switch (acp_machine_id) {
+ case ST_JADEITE:
+ {
+ adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
+ GFP_KERNEL);
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dma_irq";
+ adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].num_resources = 3;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+ r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
+ if (r)
+ goto failure;
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
+ break;
+ }
+ default:
+ adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
+ GFP_KERNEL);
+
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ }
+ i2s_pdata[0].cap = DWC_I2S_PLAY;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1 |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1;
+ }
+
+ i2s_pdata[1].cap = DWC_I2S_RECORD;
+ i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ break;
+ }
+
+ i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+ i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+ adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+ adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+ adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+ adev->acp.acp_res[4].name = "acp2x_dma_irq";
+ adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].num_resources = 5;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].num_resources = 1;
+ adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+ adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+ adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].num_resources = 1;
+ adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+ adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+ adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
+ r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
+ if (r)
+ goto failure;
+
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
+ }
+
+ /* Assert Soft reset of ACP */
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+ val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+ cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+ count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+ while (true) {
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+ if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+ (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+ break;
+ if (--count == 0) {
+ dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+ r = -ETIMEDOUT;
+ goto failure;
+ }
+ udelay(100);
+ }
+ /* Enable clock to ACP and wait until the clock is enabled */
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+ val = val | ACP_CONTROL__ClkEn_MASK;
+ cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+ count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+ while (true) {
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+ if (val & (u32) 0x1)
+ break;
+ if (--count == 0) {
+ dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+ r = -ETIMEDOUT;
+ goto failure;
+ }
+ udelay(100);
+ }
+ /* Deassert the SOFT RESET flags */
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+ val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
+ cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+ return 0;
+
+failure:
+ kfree(i2s_pdata);
+ kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_cell);
+ kfree(adev->acp.acp_genpd);
+ return r;
+}
+
+/**
+ * acp_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ */
+static int acp_hw_fini(void *handle)
+{
+ u32 val = 0;
+ u32 count = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* return early if no ACP */
+ if (!adev->acp.acp_genpd) {
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ return 0;
+ }
+
+ /* Assert Soft reset of ACP */
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+ val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+ cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+ count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+ while (true) {
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+ if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+ (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+ break;
+ if (--count == 0) {
+ dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+ return -ETIMEDOUT;
+ }
+ udelay(100);
+ }
+ /* Disable ACP clock */
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+ val &= ~ACP_CONTROL__ClkEn_MASK;
+ cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+ count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+ while (true) {
+ val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+ if (val & (u32) 0x1)
+ break;
+ if (--count == 0) {
+ dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+ return -ETIMEDOUT;
+ }
+ udelay(100);
+ }
+
+ device_for_each_child(adev->acp.parent, NULL,
+ acp_genpd_remove_device);
+
+ mfd_remove_devices(adev->acp.parent);
+ kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_genpd);
+ kfree(adev->acp.acp_cell);
+
+ return 0;
+}
+
+static int acp_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* power up on suspend */
+ if (!adev->acp.acp_cell)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ return 0;
+}
+
+static int acp_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* power down again on resume */
+ if (!adev->acp.acp_cell)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ return 0;
+}
+
+static int acp_early_init(void *handle)
+{
+ return 0;
+}
+
+static bool acp_is_idle(void *handle)
+{
+ return true;
+}
+
+static int acp_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int acp_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int acp_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int acp_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+
+ return 0;
+}
+
+static const struct amd_ip_funcs acp_ip_funcs = {
+ .name = "acp_ip",
+ .early_init = acp_early_init,
+ .late_init = NULL,
+ .sw_init = acp_sw_init,
+ .sw_fini = acp_sw_fini,
+ .hw_init = acp_hw_init,
+ .hw_fini = acp_hw_fini,
+ .suspend = acp_suspend,
+ .resume = acp_resume,
+ .is_idle = acp_is_idle,
+ .wait_for_idle = acp_wait_for_idle,
+ .soft_reset = acp_soft_reset,
+ .set_clockgating_state = acp_set_clockgating_state,
+ .set_powergating_state = acp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version acp_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ACP,
+ .major = 2,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &acp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
new file mode 100644
index 000000000..a288ce25c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_ACP_H__
+#define __AMDGPU_ACP_H__
+
+#include <linux/mfd/core.h>
+
+struct amdgpu_acp {
+ struct device *parent;
+ struct cgs_device *cgs_device;
+ struct amd_acp_private *private;
+ struct mfd_cell *acp_cell;
+ struct resource *acp_res;
+ struct acp_pm_domain *acp_genpd;
+};
+
+extern const struct amdgpu_ip_block_version acp_ip_block;
+
+#endif /* __AMDGPU_ACP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
new file mode 100644
index 000000000..2bca37044
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -0,0 +1,1518 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/power_supply.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <acpi/video.h>
+#include <acpi/actbl.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_display.h"
+#include "amd_acpi.h"
+#include "atom.h"
+
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+ 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+ 0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+struct xarray numa_info_xa;
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+ struct list_head list;
+ struct amdgpu_numa_info *numa_info;
+ uint8_t xcp_node;
+ uint8_t phy_id;
+ acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+ struct list_head list;
+ struct list_head xcc_list;
+ uint16_t bdf;
+ uint16_t supp_xcp_mode;
+ uint16_t xcp_mode;
+ uint16_t mem_mode;
+ uint64_t tmr_base;
+ uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
+struct amdgpu_atif_notification_cfg {
+ bool enabled;
+ int command_code;
+};
+
+struct amdgpu_atif_notifications {
+ bool thermal_state;
+ bool forced_power_state;
+ bool system_power_state;
+ bool brightness_change;
+ bool dgpu_display_event;
+ bool gpu_package_power_limit;
+};
+
+struct amdgpu_atif_functions {
+ bool system_params;
+ bool sbios_requests;
+ bool temperature_change;
+ bool query_backlight_transfer_characteristics;
+ bool ready_to_undock;
+ bool external_gpu_information;
+};
+
+struct amdgpu_atif {
+ acpi_handle handle;
+
+ struct amdgpu_atif_notifications notifications;
+ struct amdgpu_atif_functions functions;
+ struct amdgpu_atif_notification_cfg notification_cfg;
+ struct backlight_device *bd;
+ struct amdgpu_dm_backlight_caps backlight_caps;
+};
+
+struct amdgpu_atcs_functions {
+ bool get_ext_state;
+ bool pcie_perf_req;
+ bool pcie_dev_rdy;
+ bool pcie_bus_width;
+ bool power_shift_control;
+};
+
+struct amdgpu_atcs {
+ acpi_handle handle;
+
+ struct amdgpu_atcs_functions functions;
+};
+
+static struct amdgpu_acpi_priv {
+ struct amdgpu_atif atif;
+ struct amdgpu_atcs atcs;
+} amdgpu_acpi_priv;
+
+/* Call the ATIF method
+ */
+/**
+ * amdgpu_atif_call - call an ATIF method
+ *
+ * @atif: atif structure
+ * @function: the ATIF function to execute
+ * @params: ATIF function params
+ *
+ * Executes the requested ATIF function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
+ int function,
+ struct acpi_buffer *params)
+{
+ acpi_status status;
+ union acpi_object atif_arg_elements[2];
+ struct acpi_object_list atif_arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ atif_arg.count = 2;
+ atif_arg.pointer = &atif_arg_elements[0];
+
+ atif_arg_elements[0].type = ACPI_TYPE_INTEGER;
+ atif_arg_elements[0].integer.value = function;
+
+ if (params) {
+ atif_arg_elements[1].type = ACPI_TYPE_BUFFER;
+ atif_arg_elements[1].buffer.length = params->length;
+ atif_arg_elements[1].buffer.pointer = params->pointer;
+ } else {
+ /* We need a second fake parameter */
+ atif_arg_elements[1].type = ACPI_TYPE_INTEGER;
+ atif_arg_elements[1].integer.value = 0;
+ }
+
+ status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
+ &buffer);
+
+ /* Fail only if calling the method fails and ATIF is supported */
+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
+ acpi_format_exception(status));
+ kfree(buffer.pointer);
+ return NULL;
+ }
+
+ return buffer.pointer;
+}
+
+/**
+ * amdgpu_atif_parse_notification - parse supported notifications
+ *
+ * @n: supported notifications struct
+ * @mask: supported notifications mask from ATIF
+ *
+ * Use the supported notifications mask from ATIF function
+ * ATIF_FUNCTION_VERIFY_INTERFACE to determine what notifications
+ * are supported (all asics).
+ */
+static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
+{
+ n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
+ n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
+ n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
+ n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
+ n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
+ n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
+}
+
+/**
+ * amdgpu_atif_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATIF
+ *
+ * Use the supported functions mask from ATIF function
+ * ATIF_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mask)
+{
+ f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
+ f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
+ f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
+ f->query_backlight_transfer_characteristics =
+ mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
+ f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
+ f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
+}
+
+/**
+ * amdgpu_atif_verify_interface - verify ATIF
+ *
+ * @atif: amdgpu atif struct
+ *
+ * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
+ * to initialize ATIF and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
+{
+ union acpi_object *info;
+ struct atif_verify_interface output;
+ size_t size;
+ int err = 0;
+
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
+ if (!info)
+ return -EIO;
+
+ memset(&output, 0, sizeof(output));
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 12) {
+ DRM_INFO("ATIF buffer is too small: %zu\n", size);
+ err = -EINVAL;
+ goto out;
+ }
+ size = min(sizeof(output), size);
+
+ memcpy(&output, info->buffer.pointer, size);
+
+ /* TODO: check version? */
+ DRM_DEBUG_DRIVER("ATIF version %u\n", output.version);
+
+ amdgpu_atif_parse_notification(&atif->notifications, output.notification_mask);
+ amdgpu_atif_parse_functions(&atif->functions, output.function_bits);
+
+out:
+ kfree(info);
+ return err;
+}
+
+/**
+ * amdgpu_atif_get_notification_params - determine notify configuration
+ *
+ * @atif: acpi handle
+ *
+ * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
+ * to determine if a notifier is used and if so which one
+ * (all asics). This is either Notify(VGA, 0x81) or Notify(VGA, n)
+ * where n is specified in the result if a notifier is used.
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
+{
+ union acpi_object *info;
+ struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg;
+ struct atif_system_params params;
+ size_t size;
+ int err = 0;
+
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS,
+ NULL);
+ if (!info) {
+ err = -EIO;
+ goto out;
+ }
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 10) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memset(&params, 0, sizeof(params));
+ size = min(sizeof(params), size);
+ memcpy(&params, info->buffer.pointer, size);
+
+ DRM_DEBUG_DRIVER("SYSTEM_PARAMS: mask = %#x, flags = %#x\n",
+ params.flags, params.valid_mask);
+ params.flags = params.flags & params.valid_mask;
+
+ if ((params.flags & ATIF_NOTIFY_MASK) == ATIF_NOTIFY_NONE) {
+ n->enabled = false;
+ n->command_code = 0;
+ } else if ((params.flags & ATIF_NOTIFY_MASK) == ATIF_NOTIFY_81) {
+ n->enabled = true;
+ n->command_code = 0x81;
+ } else {
+ if (size < 11) {
+ err = -EINVAL;
+ goto out;
+ }
+ n->enabled = true;
+ n->command_code = params.command_code;
+ }
+
+out:
+ DRM_DEBUG_DRIVER("Notification %s, command code = %#x\n",
+ (n->enabled ? "enabled" : "disabled"),
+ n->command_code);
+ kfree(info);
+ return err;
+}
+
+/**
+ * amdgpu_atif_query_backlight_caps - get min and max backlight input signal
+ *
+ * @atif: acpi handle
+ *
+ * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
+ * to determine the acceptable range of backlight values
+ *
+ * Backlight_caps.caps_valid will be set to true if the query is successful
+ *
+ * The input signals are in range 0-255
+ *
+ * This function assumes the display with backlight is the first LCD
+ *
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
+{
+ union acpi_object *info;
+ struct atif_qbtc_output characteristics;
+ struct atif_qbtc_arguments arguments;
+ struct acpi_buffer params;
+ size_t size;
+ int err = 0;
+
+ arguments.size = sizeof(arguments);
+ arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;
+
+ params.length = sizeof(arguments);
+ params.pointer = (void *)&arguments;
+
+ info = amdgpu_atif_call(atif,
+ ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
+ &params);
+ if (!info) {
+ err = -EIO;
+ goto out;
+ }
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 10) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memset(&characteristics, 0, sizeof(characteristics));
+ size = min(sizeof(characteristics), size);
+ memcpy(&characteristics, info->buffer.pointer, size);
+
+ atif->backlight_caps.caps_valid = true;
+ atif->backlight_caps.min_input_signal =
+ characteristics.min_input_signal;
+ atif->backlight_caps.max_input_signal =
+ characteristics.max_input_signal;
+out:
+ kfree(info);
+ return err;
+}
+
+/**
+ * amdgpu_atif_get_sbios_requests - get requested sbios event
+ *
+ * @atif: acpi handle
+ * @req: atif sbios request struct
+ *
+ * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
+ * to determine what requests the sbios is making to the driver
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif,
+ struct atif_sbios_requests *req)
+{
+ union acpi_object *info;
+ size_t size;
+ int count = 0;
+
+ info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS,
+ NULL);
+ if (!info)
+ return -EIO;
+
+ size = *(u16 *)info->buffer.pointer;
+ if (size < 0xd) {
+ count = -EINVAL;
+ goto out;
+ }
+ memset(req, 0, sizeof(*req));
+
+ size = min(sizeof(*req), size);
+ memcpy(req, info->buffer.pointer, size);
+ DRM_DEBUG_DRIVER("SBIOS pending requests: %#x\n", req->pending);
+
+ count = hweight32(req->pending);
+
+out:
+ kfree(info);
+ return count;
+}
+
+/**
+ * amdgpu_atif_handler - handle ATIF notify requests
+ *
+ * @adev: amdgpu_device pointer
+ * @event: atif sbios request struct
+ *
+ * Checks the acpi event and if it matches an atif event,
+ * handles it.
+ *
+ * Returns:
+ * NOTIFY_BAD or NOTIFY_DONE, depending on the event.
+ */
+static int amdgpu_atif_handler(struct amdgpu_device *adev,
+ struct acpi_bus_event *event)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+ int count;
+
+ DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
+ event->device_class, event->type);
+
+ if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
+ return NOTIFY_DONE;
+
+ /* Is this actually our event? */
+ if (!atif->notification_cfg.enabled ||
+ event->type != atif->notification_cfg.command_code) {
+ /* These events will generate keypresses otherwise */
+ if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
+ return NOTIFY_BAD;
+ else
+ return NOTIFY_DONE;
+ }
+
+ if (atif->functions.sbios_requests) {
+ struct atif_sbios_requests req;
+
+ /* Check pending SBIOS requests */
+ count = amdgpu_atif_get_sbios_requests(atif, &req);
+
+ if (count <= 0)
+ return NOTIFY_BAD;
+
+ DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
+
+ if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
+ if (atif->bd) {
+ DRM_DEBUG_DRIVER("Changing brightness to %d\n",
+ req.backlight_level);
+ /*
+ * XXX backlight_device_set_brightness() is
+ * hardwired to post BACKLIGHT_UPDATE_SYSFS.
+ * It probably should accept 'reason' parameter.
+ */
+ backlight_device_set_brightness(atif->bd, req.backlight_level);
+ }
+ }
+
+ if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
+ if (adev->flags & AMD_IS_PX) {
+ pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ /* Just fire off a uevent and let userspace tell us what to do */
+ drm_helper_hpd_irq_event(adev_to_drm(adev));
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ }
+ }
+ /* TODO: check other events */
+ }
+
+ /* We've handled the event, stop the notifier chain. The ACPI interface
+ * overloads ACPI_VIDEO_NOTIFY_PROBE, we don't want to send that to
+ * userspace if the event was generated only to signal a SBIOS
+ * request.
+ */
+ return NOTIFY_BAD;
+}
+
+/* Call the ATCS method
+ */
+/**
+ * amdgpu_atcs_call - call an ATCS method
+ *
+ * @atcs: atcs structure
+ * @function: the ATCS function to execute
+ * @params: ATCS function params
+ *
+ * Executes the requested ATCS function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
+ int function,
+ struct acpi_buffer *params)
+{
+ acpi_status status;
+ union acpi_object atcs_arg_elements[2];
+ struct acpi_object_list atcs_arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ atcs_arg.count = 2;
+ atcs_arg.pointer = &atcs_arg_elements[0];
+
+ atcs_arg_elements[0].type = ACPI_TYPE_INTEGER;
+ atcs_arg_elements[0].integer.value = function;
+
+ if (params) {
+ atcs_arg_elements[1].type = ACPI_TYPE_BUFFER;
+ atcs_arg_elements[1].buffer.length = params->length;
+ atcs_arg_elements[1].buffer.pointer = params->pointer;
+ } else {
+ /* We need a second fake parameter */
+ atcs_arg_elements[1].type = ACPI_TYPE_INTEGER;
+ atcs_arg_elements[1].integer.value = 0;
+ }
+
+ status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer);
+
+ /* Fail only if calling the method fails and ATIF is supported */
+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ DRM_DEBUG_DRIVER("failed to evaluate ATCS got %s\n",
+ acpi_format_exception(status));
+ kfree(buffer.pointer);
+ return NULL;
+ }
+
+ return buffer.pointer;
+}
+
+/**
+ * amdgpu_atcs_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATCS
+ *
+ * Use the supported functions mask from ATCS function
+ * ATCS_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mask)
+{
+ f->get_ext_state = mask & ATCS_GET_EXTERNAL_STATE_SUPPORTED;
+ f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED;
+ f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED;
+ f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED;
+ f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED;
+}
+
+/**
+ * amdgpu_atcs_verify_interface - verify ATCS
+ *
+ * @atcs: amdgpu atcs struct
+ *
+ * Execute the ATCS_FUNCTION_VERIFY_INTERFACE ATCS function
+ * to initialize ATCS and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
+{
+ union acpi_object *info;
+ struct atcs_verify_interface output;
+ size_t size;
+ int err = 0;
+
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
+ if (!info)
+ return -EIO;
+
+ memset(&output, 0, sizeof(output));
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 8) {
+ DRM_INFO("ATCS buffer is too small: %zu\n", size);
+ err = -EINVAL;
+ goto out;
+ }
+ size = min(sizeof(output), size);
+
+ memcpy(&output, info->buffer.pointer, size);
+
+ /* TODO: check version? */
+ DRM_DEBUG_DRIVER("ATCS version %u\n", output.version);
+
+ amdgpu_atcs_parse_functions(&atcs->functions, output.function_bits);
+
+out:
+ kfree(info);
+ return err;
+}
+
+/**
+ * amdgpu_acpi_is_pcie_performance_request_supported
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check if the ATCS pcie_perf_req and pcie_dev_rdy methods
+ * are supported (all asics).
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev)
+{
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+
+ if (atcs->functions.pcie_perf_req && atcs->functions.pcie_dev_rdy)
+ return true;
+
+ return false;
+}
+
+/**
+ * amdgpu_acpi_is_power_shift_control_supported
+ *
+ * Check if the ATCS power shift control method
+ * is supported.
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_power_shift_control_supported(void)
+{
+ return amdgpu_acpi_priv.atcs.functions.power_shift_control;
+}
+
+/**
+ * amdgpu_acpi_pcie_notify_device_ready
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Executes the PCIE_DEVICE_READY_NOTIFICATION method
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
+{
+ union acpi_object *info;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+
+ if (!atcs->functions.pcie_dev_rdy)
+ return -EINVAL;
+
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
+ if (!info)
+ return -EIO;
+
+ kfree(info);
+
+ return 0;
+}
+
+/**
+ * amdgpu_acpi_pcie_performance_request
+ *
+ * @adev: amdgpu_device pointer
+ * @perf_req: requested perf level (pcie gen speed)
+ * @advertise: set advertise caps flag if set
+ *
+ * Executes the PCIE_PERFORMANCE_REQUEST method to
+ * change the pcie gen speed (all asics).
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
+ u8 perf_req, bool advertise)
+{
+ union acpi_object *info;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct atcs_pref_req_input atcs_input;
+ struct atcs_pref_req_output atcs_output;
+ struct acpi_buffer params;
+ size_t size;
+ u32 retry = 3;
+
+ if (amdgpu_acpi_pcie_notify_device_ready(adev))
+ return -EINVAL;
+
+ if (!atcs->functions.pcie_perf_req)
+ return -EINVAL;
+
+ atcs_input.size = sizeof(struct atcs_pref_req_input);
+ /* client id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+ atcs_input.client_id = pci_dev_id(adev->pdev);
+ atcs_input.valid_flags_mask = ATCS_VALID_FLAGS_MASK;
+ atcs_input.flags = ATCS_WAIT_FOR_COMPLETION;
+ if (advertise)
+ atcs_input.flags |= ATCS_ADVERTISE_CAPS;
+ atcs_input.req_type = ATCS_PCIE_LINK_SPEED;
+ atcs_input.perf_req = perf_req;
+
+ params.length = sizeof(struct atcs_pref_req_input);
+ params.pointer = &atcs_input;
+
+ while (retry--) {
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &params);
+ if (!info)
+ return -EIO;
+
+ memset(&atcs_output, 0, sizeof(atcs_output));
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 3) {
+ DRM_INFO("ATCS buffer is too small: %zu\n", size);
+ kfree(info);
+ return -EINVAL;
+ }
+ size = min(sizeof(atcs_output), size);
+
+ memcpy(&atcs_output, info->buffer.pointer, size);
+
+ kfree(info);
+
+ switch (atcs_output.ret_val) {
+ case ATCS_REQUEST_REFUSED:
+ default:
+ return -EINVAL;
+ case ATCS_REQUEST_COMPLETE:
+ return 0;
+ case ATCS_REQUEST_IN_PROGRESS:
+ udelay(10);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_acpi_power_shift_control
+ *
+ * @adev: amdgpu_device pointer
+ * @dev_state: device acpi state
+ * @drv_state: driver state
+ *
+ * Executes the POWER_SHIFT_CONTROL method to
+ * communicate current dGPU device state and
+ * driver state to APU/SBIOS.
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+ u8 dev_state, bool drv_state)
+{
+ union acpi_object *info;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct atcs_pwr_shift_input atcs_input;
+ struct acpi_buffer params;
+
+ if (!amdgpu_acpi_is_power_shift_control_supported())
+ return -EINVAL;
+
+ atcs_input.size = sizeof(struct atcs_pwr_shift_input);
+ /* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+ atcs_input.dgpu_id = pci_dev_id(adev->pdev);
+ atcs_input.dev_acpi_state = dev_state;
+ atcs_input.drv_state = drv_state;
+
+ params.length = sizeof(struct atcs_pwr_shift_input);
+ params.pointer = &atcs_input;
+
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, &params);
+ if (!info) {
+ DRM_ERROR("ATCS PSC update failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS
+ *
+ * @dev: drm_device pointer
+ * @ss_state: current smart shift event
+ *
+ * returns 0 on success,
+ * otherwise return error number.
+ */
+int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ if (!amdgpu_device_supports_smart_shift(dev))
+ return 0;
+
+ switch (ss_state) {
+ /* SBIOS trigger “stop”, “enable” and “start” at D0, Driver Operational.
+ * SBIOS trigger “stop” at D3, Driver Not Operational.
+ * SBIOS trigger “stop” and “disable” at D0, Driver NOT operational.
+ */
+ case AMDGPU_SS_DRV_LOAD:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+ break;
+ case AMDGPU_SS_DEV_D0:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+ break;
+ case AMDGPU_SS_DEV_D3:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT,
+ AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+ break;
+ case AMDGPU_SS_DRV_UNLOAD:
+ r = amdgpu_acpi_power_shift_control(adev,
+ AMDGPU_ATCS_PSC_DEV_STATE_D0,
+ AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+ /* This is directly using si_meminfo_node implementation as the
+ * function is not exported.
+ */
+ int zone_type;
+ uint64_t managed_pages = 0;
+
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages +=
+ zone_managed_pages(&pgdat->node_zones[zone_type]);
+ return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+ struct amdgpu_numa_info *numa_info;
+ int nid;
+
+ numa_info = xa_load(&numa_info_xa, pxm);
+
+ if (!numa_info) {
+ struct sysinfo info;
+
+ numa_info = kzalloc(sizeof(*numa_info), GFP_KERNEL);
+ if (!numa_info)
+ return NULL;
+
+ nid = pxm_to_node(pxm);
+ numa_info->pxm = pxm;
+ numa_info->nid = nid;
+
+ if (numa_info->nid == NUMA_NO_NODE) {
+ si_meminfo(&info);
+ numa_info->size = info.totalram * info.mem_unit;
+ } else {
+ numa_info->size = amdgpu_acpi_get_numa_size(nid);
+ }
+ xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+ }
+
+ return numa_info;
+}
+#endif
+
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @numa_info: amdgpu_numa_info structure holding numa information
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
+ */
+static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+ struct amdgpu_numa_info **numa_info)
+{
+#ifdef CONFIG_ACPI_NUMA
+ u64 pxm;
+ acpi_status status;
+
+ if (!numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+ if (ACPI_FAILURE(status))
+ return status;
+
+ *numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+ if (!*numa_info)
+ return_ACPI_STATUS(AE_ERROR);
+
+ return_ACPI_STATUS(AE_OK);
+#else
+ return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+{
+ struct amdgpu_acpi_dev_info *acpi_dev;
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return NULL;
+
+ list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+ if (acpi_dev->bdf == bdf)
+ return acpi_dev;
+
+ return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+ struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+{
+ struct amdgpu_acpi_dev_info *tmp;
+ union acpi_object *obj;
+ int ret = -ENOENT;
+
+ *dev_info = NULL;
+ tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmp->xcc_list);
+ INIT_LIST_HEAD(&tmp->list);
+ tmp->bdf = bdf;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_SUPP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
+ ACPI_FREE(obj);
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_XCP_MODE, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_XCP_MODE);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->xcp_mode = obj->integer.value & 0xFFFF;
+ tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_TMR_INFO, NULL,
+ ACPI_TYPE_PACKAGE);
+
+ if (!obj || obj->package.count < 2) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_TMR_INFO);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ tmp->tmr_base = obj->package.elements[0].integer.value;
+ tmp->tmr_size = obj->package.elements[1].integer.value;
+ ACPI_FREE(obj);
+
+ DRM_DEBUG_DRIVER(
+ "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
+ tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->tmr_base, tmp->tmr_size);
+ list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
+ *dev_info = tmp;
+
+ return 0;
+
+out:
+ if (obj)
+ ACPI_FREE(obj);
+ kfree(tmp);
+
+ return ret;
+}
+
+static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
+ u16 *bdf)
+{
+ union acpi_object *obj;
+ acpi_status status;
+ int ret = -ENOENT;
+
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
+ goto out;
+ ACPI_FREE(obj);
+
+ /* Evaluate DSMs and fill XCC information */
+ obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
+ ACPI_TYPE_INTEGER);
+
+ if (!obj) {
+ acpi_handle_debug(xcc_info->handle,
+ "_DSM function %d evaluation failed",
+ AMD_XCC_DSM_GET_VF_XCC_MAPPING);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* PF xcc id [39:32] */
+ xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
+ /* xcp node of this xcc [47:40] */
+ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF bus/dev/fn of this xcc [63:48] */
+ *bdf = (obj->integer.value >> 48) & 0xFFFF;
+ ACPI_FREE(obj);
+ obj = NULL;
+
+ status =
+ amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
+
+ /* TODO: check if this check is required */
+ if (ACPI_SUCCESS(status))
+ ret = 0;
+out:
+ if (obj)
+ ACPI_FREE(obj);
+
+ return ret;
+}
+
+static int amdgpu_acpi_enumerate_xcc(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info = NULL;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ struct acpi_device *acpi_dev;
+ char hid[ACPI_ID_LEN];
+ int ret, id;
+ u16 bdf;
+
+ INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+ xa_init(&numa_info_xa);
+
+ for (id = 0; id < AMD_XCC_MAX_HID; id++) {
+ sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
+ acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
+ /* These ACPI objects are expected to be in sequential order. If
+ * one is not found, no need to check the rest.
+ */
+ if (!acpi_dev) {
+ DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+ hid);
+ break;
+ }
+
+ xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
+ GFP_KERNEL);
+ if (!xcc_info) {
+ DRM_ERROR("Failed to allocate memory for xcc info\n");
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&xcc_info->list);
+ xcc_info->handle = acpi_device_handle(acpi_dev);
+ acpi_dev_put(acpi_dev);
+
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
+ if (ret) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ dev_info = amdgpu_acpi_get_dev(bdf);
+
+ if (!dev_info)
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
+
+ if (ret == -ENOMEM)
+ return ret;
+
+ if (!dev_info) {
+ kfree(xcc_info);
+ continue;
+ }
+
+ list_add_tail(&xcc_info->list, &dev_info->xcc_list);
+ }
+
+ return 0;
+}
+
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+ u64 *tmr_size)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ u16 bdf;
+
+ if (!tmr_offset || !tmr_size)
+ return -EINVAL;
+
+ bdf = pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(bdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ *tmr_offset = dev_info->tmr_base;
+ *tmr_size = dev_info->tmr_size;
+
+ return 0;
+}
+
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_numa_info *numa_info)
+{
+ struct amdgpu_acpi_dev_info *dev_info;
+ struct amdgpu_acpi_xcc_info *xcc_info;
+ u16 bdf;
+
+ if (!numa_info)
+ return -EINVAL;
+
+ bdf = pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(bdf);
+ if (!dev_info)
+ return -ENOENT;
+
+ list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+ if (xcc_info->phy_id == xcc_id) {
+ memcpy(numa_info, xcc_info->numa_info,
+ sizeof(*numa_info));
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/**
+ * amdgpu_acpi_event - handle notify events
+ *
+ * @nb: notifier block
+ * @val: val
+ * @data: acpi event
+ *
+ * Calls relevant amdgpu functions in response to various
+ * acpi events.
+ * Returns NOTIFY code
+ */
+static int amdgpu_acpi_event(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, acpi_nb);
+ struct acpi_bus_event *entry = (struct acpi_bus_event *)data;
+
+ if (strcmp(entry->device_class, ACPI_AC_CLASS) == 0) {
+ if (power_supply_is_system_supplied() > 0)
+ DRM_DEBUG_DRIVER("pm: AC\n");
+ else
+ DRM_DEBUG_DRIVER("pm: DC\n");
+
+ amdgpu_pm_acpi_event_handler(adev);
+ }
+
+ /* Check for pending SBIOS requests */
+ return amdgpu_atif_handler(adev, entry);
+}
+
+/* Call all ACPI methods here */
+/**
+ * amdgpu_acpi_init - init driver acpi support
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Verifies the AMD ACPI interfaces and registers with the acpi
+ * notifier chain (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+
+ if (atif->notifications.brightness_change) {
+ if (adev->dc_enabled) {
+#if defined(CONFIG_DRM_AMD_DC)
+ struct amdgpu_display_manager *dm = &adev->dm;
+
+ if (dm->backlight_dev[0])
+ atif->bd = dm->backlight_dev[0];
+#endif
+ } else {
+ struct drm_encoder *tmp;
+
+ /* Find the encoder controlling the brightness */
+ list_for_each_entry(tmp, &adev_to_drm(adev)->mode_config.encoder_list,
+ head) {
+ struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
+
+ if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
+ enc->enc_priv) {
+ struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
+ if (dig->bl_dev) {
+ atif->bd = dig->bl_dev;
+ break;
+ }
+ }
+ }
+ }
+ }
+ adev->acpi_nb.notifier_call = amdgpu_acpi_event;
+ register_acpi_notifier(&adev->acpi_nb);
+
+ return 0;
+}
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+
+ caps->caps_valid = atif->backlight_caps.caps_valid;
+ caps->min_input_signal = atif->backlight_caps.min_input_signal;
+ caps->max_input_signal = atif->backlight_caps.max_input_signal;
+}
+
+/**
+ * amdgpu_acpi_fini - tear down driver acpi support
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Unregisters with the acpi notifier chain (all asics).
+ */
+void amdgpu_acpi_fini(struct amdgpu_device *adev)
+{
+ unregister_acpi_notifier(&adev->acpi_nb);
+}
+
+/**
+ * amdgpu_atif_pci_probe_handle - look up the ATIF handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATIF handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev)
+{
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+ acpi_handle dhandle, atif_handle;
+ acpi_status status;
+ int ret;
+
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ return false;
+
+ status = acpi_get_handle(dhandle, "ATIF", &atif_handle);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ amdgpu_acpi_priv.atif.handle = atif_handle;
+ acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer);
+ DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
+ ret = amdgpu_atif_verify_interface(&amdgpu_acpi_priv.atif);
+ if (ret) {
+ amdgpu_acpi_priv.atif.handle = 0;
+ return false;
+ }
+ return true;
+}
+
+/**
+ * amdgpu_atcs_pci_probe_handle - look up the ATCS handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATCS handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
+{
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
+ acpi_handle dhandle, atcs_handle;
+ acpi_status status;
+ int ret;
+
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ return false;
+
+ status = acpi_get_handle(dhandle, "ATCS", &atcs_handle);
+ if (ACPI_FAILURE(status))
+ return false;
+
+ amdgpu_acpi_priv.atcs.handle = atcs_handle;
+ acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer);
+ DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
+ ret = amdgpu_atcs_verify_interface(&amdgpu_acpi_priv.atcs);
+ if (ret) {
+ amdgpu_acpi_priv.atcs.handle = 0;
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
+ return false;
+
+ if ((adev->flags & AMD_IS_APU) &&
+ amdgpu_acpi_is_s3_active(adev))
+ return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+#else
+ return true;
+#endif
+}
+
+/*
+ * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods
+ *
+ * Check if we have the ATIF/ATCS methods and populate
+ * the structures in the driver.
+ */
+void amdgpu_acpi_detect(void)
+{
+ struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct pci_dev *pdev = NULL;
+ int ret;
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ if (!atif->handle)
+ amdgpu_atif_pci_probe_handle(pdev);
+ if (!atcs->handle)
+ amdgpu_atcs_pci_probe_handle(pdev);
+ }
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
+ if (!atif->handle)
+ amdgpu_atif_pci_probe_handle(pdev);
+ if (!atcs->handle)
+ amdgpu_atcs_pci_probe_handle(pdev);
+ }
+
+ if (atif->functions.sbios_requests && !atif->functions.system_params) {
+ /* XXX check this workraround, if sbios request function is
+ * present we have to see how it's configured in the system
+ * params
+ */
+ atif->functions.system_params = true;
+ }
+
+ if (atif->functions.system_params) {
+ ret = amdgpu_atif_get_notification_params(atif);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n",
+ ret);
+ /* Disable notification */
+ atif->notification_cfg.enabled = false;
+ }
+ }
+
+ if (atif->functions.query_backlight_transfer_characteristics) {
+ ret = amdgpu_atif_query_backlight_caps(atif);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
+ ret);
+ atif->backlight_caps.caps_valid = false;
+ }
+ } else {
+ atif->backlight_caps.caps_valid = false;
+ }
+
+ amdgpu_acpi_enumerate_xcc();
+}
+
+void amdgpu_acpi_release(void)
+{
+ struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
+ struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+ struct amdgpu_numa_info *numa_info;
+ unsigned long index;
+
+ xa_for_each(&numa_info_xa, index, numa_info) {
+ kfree(numa_info);
+ xa_erase(&numa_info_xa, index);
+ }
+
+ if (list_empty(&amdgpu_acpi_dev_list))
+ return;
+
+ list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
+ list) {
+ list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
+ list) {
+ list_del(&xcc_info->list);
+ kfree(xcc_info);
+ }
+
+ list_del(&dev_info->list);
+ kfree(dev_info);
+ }
+}
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+/**
+ * amdgpu_acpi_is_s3_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+{
+ return !(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state == PM_SUSPEND_MEM);
+}
+
+/**
+ * amdgpu_acpi_is_s0ix_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
+{
+ if (!(adev->flags & AMD_IS_APU) ||
+ (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+ return false;
+
+ if (adev->asic_type < CHIP_RAVEN)
+ return false;
+
+ /*
+ * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
+ * risky to do any special firmware-related preparations for entering
+ * S0ix even though the system is suspending to idle, so return false
+ * in that case.
+ */
+ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+ "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+ return false;
+ }
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+ dev_err_once(adev->dev,
+ "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
+ return false;
+#else
+ return true;
+#endif /* CONFIG_AMD_PMC */
+}
+
+#endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
new file mode 100644
index 000000000..a4d65973b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Christian König.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#include <linux/hdmi.h>
+#include <linux/gcd.h>
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+
+static const struct amdgpu_afmt_acr amdgpu_afmt_predefined_acr[] = {
+ /* 32kHz 44.1kHz 48kHz */
+ /* Clock N CTS N CTS N CTS */
+ { 25175, 4096, 25175, 28224, 125875, 6144, 25175 }, /* 25,20/1.001 MHz */
+ { 25200, 4096, 25200, 6272, 28000, 6144, 25200 }, /* 25.20 MHz */
+ { 27000, 4096, 27000, 6272, 30000, 6144, 27000 }, /* 27.00 MHz */
+ { 27027, 4096, 27027, 6272, 30030, 6144, 27027 }, /* 27.00*1.001 MHz */
+ { 54000, 4096, 54000, 6272, 60000, 6144, 54000 }, /* 54.00 MHz */
+ { 54054, 4096, 54054, 6272, 60060, 6144, 54054 }, /* 54.00*1.001 MHz */
+ { 74176, 4096, 74176, 5733, 75335, 6144, 74176 }, /* 74.25/1.001 MHz */
+ { 74250, 4096, 74250, 6272, 82500, 6144, 74250 }, /* 74.25 MHz */
+ { 148352, 4096, 148352, 5733, 150670, 6144, 148352 }, /* 148.50/1.001 MHz */
+ { 148500, 4096, 148500, 6272, 165000, 6144, 148500 }, /* 148.50 MHz */
+};
+
+
+/*
+ * calculate CTS and N values if they are not found in the table
+ */
+static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, int *N, int freq)
+{
+ int n, cts;
+ unsigned long div, mul;
+
+ /* Safe, but overly large values */
+ n = 128 * freq;
+ cts = clock * 1000;
+
+ /* Smallest valid fraction */
+ div = gcd(n, cts);
+
+ n /= div;
+ cts /= div;
+
+ /*
+ * The optimal N is 128*freq/1000. Calculate the closest larger
+ * value that doesn't truncate any bits.
+ */
+ mul = ((128*freq/1000) + (n-1))/n;
+
+ n *= mul;
+ cts *= mul;
+
+ /* Check that we are in spec (not always possible) */
+ if (n < (128*freq/1500))
+ pr_warn("Calculated ACR N value is too small. You may experience audio problems.\n");
+ if (n > (128*freq/300))
+ pr_warn("Calculated ACR N value is too large. You may experience audio problems.\n");
+
+ *N = n;
+ *CTS = cts;
+
+ DRM_DEBUG("Calculated ACR timing N=%d CTS=%d for frequency %d\n",
+ *N, *CTS, freq);
+}
+
+struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
+{
+ struct amdgpu_afmt_acr res;
+ u8 i;
+
+ /* Precalculated values for common clocks */
+ for (i = 0; i < ARRAY_SIZE(amdgpu_afmt_predefined_acr); i++) {
+ if (amdgpu_afmt_predefined_acr[i].clock == clock)
+ return amdgpu_afmt_predefined_acr[i];
+ }
+
+ /* And odd clocks get manually calculated */
+ amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
+ amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
+ amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
+
+ return res;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
new file mode 100644
index 000000000..25d5fda5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -0,0 +1,868 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_amdkfd.h"
+#include "amd_pcie.h"
+#include "amd_shared.h"
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
+#include <linux/module.h>
+#include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
+
+/* Total memory size in system memory and all GPU VRAM. Used to
+ * estimate worst case amount of memory to reserve for page tables
+ */
+uint64_t amdgpu_amdkfd_total_mem_size;
+
+static bool kfd_initialized;
+
+int amdgpu_amdkfd_init(void)
+{
+ struct sysinfo si;
+ int ret;
+
+ si_meminfo(&si);
+ amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
+ amdgpu_amdkfd_total_mem_size *= si.mem_unit;
+
+ ret = kgd2kfd_init();
+ kfd_initialized = !ret;
+
+ return ret;
+}
+
+void amdgpu_amdkfd_fini(void)
+{
+ if (kfd_initialized) {
+ kgd2kfd_exit();
+ kfd_initialized = false;
+ }
+}
+
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
+{
+ bool vf = amdgpu_sriov_vf(adev);
+
+ if (!kfd_initialized)
+ return;
+
+ adev->kfd.dev = kgd2kfd_probe(adev, vf);
+}
+
+/**
+ * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
+ * setup amdkfd
+ *
+ * @adev: amdgpu_device pointer
+ * @aperture_base: output returning doorbell aperture base physical address
+ * @aperture_size: output returning doorbell aperture size in bytes
+ * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
+ *
+ * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
+ * takes doorbells required for its own rings and reports the setup to amdkfd.
+ * amdgpu reserved doorbells are at the start of the doorbell aperture.
+ */
+static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
+ phys_addr_t *aperture_base,
+ size_t *aperture_size,
+ size_t *start_offset)
+{
+ /*
+ * The first num_kernel_doorbells are used by amdgpu.
+ * amdkfd takes whatever's left in the aperture.
+ */
+ if (adev->enable_mes) {
+ /*
+ * With MES enabled, we only need to initialize
+ * the base address. The size and offset are
+ * not initialized as AMDGPU manages the whole
+ * doorbell space.
+ */
+ *aperture_base = adev->doorbell.base;
+ *aperture_size = 0;
+ *start_offset = 0;
+ } else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
+ sizeof(u32)) {
+ *aperture_base = adev->doorbell.base;
+ *aperture_size = adev->doorbell.size;
+ *start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
+ } else {
+ *aperture_base = 0;
+ *aperture_size = 0;
+ *start_offset = 0;
+ }
+}
+
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ kfd.reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
+{
+ int i;
+ int last_valid_bit;
+
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
+
+ if (adev->kfd.dev) {
+ struct kgd2kfd_shared_resources gpu_resources = {
+ .compute_vmid_bitmap =
+ ((1 << AMDGPU_NUM_VMID) - 1) -
+ ((1 << adev->vm_manager.first_kfd_vmid) - 1),
+ .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+ .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+ .gpuvm_size = min(adev->vm_manager.max_pfn
+ << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GMC_HOLE_START),
+ .drm_render_minor = adev_to_drm(adev)->render->index,
+ .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+ .enable_mes = adev->enable_mes,
+ };
+
+ /* this is going to have a few of the MSBs set that we need to
+ * clear
+ */
+ bitmap_complement(gpu_resources.cp_queue_bitmap,
+ adev->gfx.mec_bitmap[0].queue_bitmap,
+ KGD_MAX_QUEUES);
+
+ /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+ * nbits is not compile time constant
+ */
+ last_valid_bit = 1 /* only first MEC can have compute queues */
+ * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+ for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ clear_bit(i, gpu_resources.cp_queue_bitmap);
+
+ amdgpu_doorbell_get_kfd_info(adev,
+ &gpu_resources.doorbell_physical_address,
+ &gpu_resources.doorbell_aperture_size,
+ &gpu_resources.doorbell_start_offset);
+
+ /* Since SOC15, BIF starts to statically use the
+ * lower 12 bits of doorbell addresses for routing
+ * based on settings in registers like
+ * SDMA0_DOORBELL_RANGE etc..
+ * In order to route a doorbell to CP engine, the lower
+ * 12 bits of its address has to be outside the range
+ * set for SDMA, VCN, and IH blocks.
+ */
+ if (adev->asic_type >= CHIP_VEGA10) {
+ gpu_resources.non_cp_doorbells_start =
+ adev->doorbell_index.first_non_cp;
+ gpu_resources.non_cp_doorbells_end =
+ adev->doorbell_index.last_non_cp;
+ }
+
+ adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
+ &gpu_resources);
+
+ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
+ INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
+ }
+}
+
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
+{
+ if (adev->kfd.dev) {
+ kgd2kfd_device_exit(adev->kfd.dev);
+ adev->kfd.dev = NULL;
+ amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
+ }
+}
+
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
+ const void *ih_ring_entry)
+{
+ if (adev->kfd.dev)
+ kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
+}
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
+{
+ if (adev->kfd.dev)
+ kgd2kfd_suspend(adev->kfd.dev, run_pm);
+}
+
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
+{
+ int r = 0;
+
+ if (adev->kfd.dev)
+ r = kgd2kfd_resume(adev->kfd.dev, run_pm);
+
+ return r;
+}
+
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd.dev)
+ r = kgd2kfd_pre_reset(adev->kfd.dev);
+
+ return r;
+}
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd.dev)
+ r = kgd2kfd_post_reset(adev->kfd.dev);
+
+ return r;
+}
+
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
+{
+ if (amdgpu_device_should_recover_gpu(adev))
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work);
+}
+
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr, bool cp_mqd_gfx9)
+{
+ struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_param bp;
+ int r;
+ void *cpu_ptr_tmp = NULL;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ if (cp_mqd_gfx9)
+ bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
+
+ r = amdgpu_bo_create(adev, &bp, &bo);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ /* map the buffer */
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+ goto allocate_mem_reserve_bo_failed;
+ }
+
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+ goto allocate_mem_pin_bo_failed;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", bo);
+ goto allocate_mem_kmap_bo_failed;
+ }
+
+ r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) failed to map bo to kernel for amdkfd\n", r);
+ goto allocate_mem_kmap_bo_failed;
+ }
+
+ *mem_obj = bo;
+ *gpu_addr = amdgpu_bo_gpu_offset(bo);
+ *cpu_ptr = cpu_ptr_tmp;
+
+ amdgpu_bo_unreserve(bo);
+
+ return 0;
+
+allocate_mem_kmap_bo_failed:
+ amdgpu_bo_unpin(bo);
+allocate_mem_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+allocate_mem_reserve_bo_failed:
+ amdgpu_bo_unref(&bo);
+
+ return r;
+}
+
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
+{
+ struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
+
+ amdgpu_bo_reserve(bo, true);
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&(bo));
+}
+
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+ void **mem_obj)
+{
+ struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_user *ubo;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = 1;
+ bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+ bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate gws BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ bo = &ubo->bo;
+ *mem_obj = bo;
+ return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
+{
+ struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+ amdgpu_bo_unref(&bo);
+}
+
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
+ enum kgd_engine_type type)
+{
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ return adev->gfx.pfp_fw_version;
+
+ case KGD_ENGINE_ME:
+ return adev->gfx.me_fw_version;
+
+ case KGD_ENGINE_CE:
+ return adev->gfx.ce_fw_version;
+
+ case KGD_ENGINE_MEC1:
+ return adev->gfx.mec_fw_version;
+
+ case KGD_ENGINE_MEC2:
+ return adev->gfx.mec2_fw_version;
+
+ case KGD_ENGINE_RLC:
+ return adev->gfx.rlc_fw_version;
+
+ case KGD_ENGINE_SDMA1:
+ return adev->sdma.instance[0].fw_version;
+
+ case KGD_ENGINE_SDMA2:
+ return adev->sdma.instance[1].fw_version;
+
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp)
+{
+ memset(mem_info, 0, sizeof(*mem_info));
+
+ if (xcp) {
+ if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
+ mem_info->local_mem_size_public =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ else
+ mem_info->local_mem_size_private =
+ KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ } else {
+ mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+ adev->gmc.visible_vram_size;
+ }
+ mem_info->vram_width = adev->gmc.vram_width;
+
+ pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
+ &adev->gmc.aper_base,
+ mem_info->local_mem_size_public,
+ mem_info->local_mem_size_private);
+
+ if (adev->pm.dpm_enabled) {
+ if (amdgpu_emu_mode == 1)
+ mem_info->mem_clk_max = 0;
+ else
+ mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
+ } else
+ mem_info->mem_clk_max = 100;
+}
+
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ if (adev->gfx.funcs->get_gpu_clock_counter)
+ return adev->gfx.funcs->get_gpu_clock_counter(adev);
+ return 0;
+}
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
+{
+ /* the sclk is in quantas of 10kHz */
+ if (adev->pm.dpm_enabled)
+ return amdgpu_dpm_get_sclk(adev, false) / 100;
+ else
+ return 100;
+}
+
+void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
+{
+ struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+ if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
+ return;
+
+ cu_info->cu_active_number = acu_info.number;
+ cu_info->cu_ao_mask = acu_info.ao_cu_mask;
+ memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
+ sizeof(cu_info->cu_bitmap));
+ cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+ cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+ cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+ cu_info->simd_per_cu = acu_info.simd_per_cu;
+ cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
+ cu_info->wave_front_size = acu_info.wave_front_size;
+ cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
+ cu_info->lds_size = acu_info.lds_size;
+}
+
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
+ uint64_t *bo_size, void *metadata_buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint32_t *flags, int8_t *xcp_id)
+{
+ struct dma_buf *dma_buf;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ uint64_t metadata_flags;
+ int r = -EINVAL;
+
+ dma_buf = dma_buf_get(dma_buf_fd);
+ if (IS_ERR(dma_buf))
+ return PTR_ERR(dma_buf);
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* Can't handle non-graphics buffers */
+ goto out_put;
+
+ obj = dma_buf->priv;
+ if (obj->dev->driver != adev_to_drm(adev)->driver)
+ /* Can't handle buffers from different drivers */
+ goto out_put;
+
+ adev = drm_to_adev(obj->dev);
+ bo = gem_to_amdgpu_bo(obj);
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT)))
+ /* Only VRAM and GTT BOs are supported */
+ goto out_put;
+
+ r = 0;
+ if (dmabuf_adev)
+ *dmabuf_adev = adev;
+ if (bo_size)
+ *bo_size = amdgpu_bo_size(bo);
+ if (metadata_buffer)
+ r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
+ metadata_size, &metadata_flags);
+ if (flags) {
+ *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM
+ : KFD_IOC_ALLOC_MEM_FLAGS_GTT;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
+ }
+ if (xcp_id)
+ *xcp_id = bo->xcp_id;
+
+out_put:
+ dma_buf_put(dma_buf);
+ return r;
+}
+
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
+ struct amdgpu_device *src)
+{
+ struct amdgpu_device *peer_adev = src;
+ struct amdgpu_device *adev = dst;
+ int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+ if (ret < 0) {
+ DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
+ adev->gmc.xgmi.physical_node_id,
+ peer_adev->gmc.xgmi.physical_node_id, ret);
+ ret = 0;
+ }
+ return (uint8_t)ret;
+}
+
+int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
+ struct amdgpu_device *src,
+ bool is_min)
+{
+ struct amdgpu_device *adev = dst, *peer_adev;
+ int num_links;
+
+ if (adev->asic_type != CHIP_ALDEBARAN)
+ return 0;
+
+ if (src)
+ peer_adev = src;
+
+ /* num links returns 0 for indirect peers since indirect route is unknown. */
+ num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
+ if (num_links < 0) {
+ DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
+ adev->gmc.xgmi.physical_node_id,
+ peer_adev->gmc.xgmi.physical_node_id, num_links);
+ num_links = 0;
+ }
+
+ /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
+ return (num_links * 16 * 25000)/BITS_PER_BYTE;
+}
+
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
+{
+ int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
+ fls(adev->pm.pcie_mlw_mask)) - 1;
+ int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
+ CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
+ fls(adev->pm.pcie_gen_mask &
+ CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
+ uint32_t num_lanes_mask = 1 << num_lanes_shift;
+ uint32_t gen_speed_mask = 1 << gen_speed_shift;
+ int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
+
+ switch (num_lanes_mask) {
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
+ num_lanes_factor = 1;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
+ num_lanes_factor = 2;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
+ num_lanes_factor = 4;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
+ num_lanes_factor = 8;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
+ num_lanes_factor = 12;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
+ num_lanes_factor = 16;
+ break;
+ case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
+ num_lanes_factor = 32;
+ break;
+ }
+
+ switch (gen_speed_mask) {
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
+ gen_speed_mbits_factor = 2500;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
+ gen_speed_mbits_factor = 5000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
+ gen_speed_mbits_factor = 8000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
+ gen_speed_mbits_factor = 16000;
+ break;
+ case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
+ gen_speed_mbits_factor = 32000;
+ break;
+ }
+
+ return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
+}
+
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
+ uint32_t vmid, uint64_t gpu_addr,
+ uint32_t *ib_cmd, uint32_t ib_len)
+{
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct amdgpu_ring *ring;
+ struct dma_fence *f = NULL;
+ int ret;
+
+ switch (engine) {
+ case KGD_ENGINE_MEC1:
+ ring = &adev->gfx.compute_ring[0];
+ break;
+ case KGD_ENGINE_SDMA1:
+ ring = &adev->sdma.instance[0].ring;
+ break;
+ case KGD_ENGINE_SDMA2:
+ ring = &adev->sdma.instance[1].ring;
+ break;
+ default:
+ pr_err("Invalid engine in IB submission: %d\n", engine);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job);
+ if (ret)
+ goto err;
+
+ ib = &job->ibs[0];
+ memset(ib, 0, sizeof(struct amdgpu_ib));
+
+ ib->gpu_addr = gpu_addr;
+ ib->ptr = ib_cmd;
+ ib->length_dw = ib_len;
+ /* This works for NO_HWS. TODO: need to handle without knowing VMID */
+ job->vmid = vmid;
+ job->num_ibs = 1;
+
+ ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+
+ if (ret) {
+ DRM_ERROR("amdgpu: failed to schedule IB.\n");
+ goto err_ib_sched;
+ }
+
+ /* Drop the initial kref_init count (see drm_sched_main as example) */
+ dma_fence_put(f);
+ ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+ amdgpu_job_free(job);
+err:
+ return ret;
+}
+
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
+{
+ /* Temporary workaround to fix issues observed in some
+ * compute applications when GFXOFF is enabled on GFX11.
+ */
+ if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) {
+ pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
+ amdgpu_gfx_off_ctrl(adev, idle);
+ }
+ amdgpu_dpm_switch_power_profile(adev,
+ PP_SMC_POWER_PROFILE_COMPUTE,
+ !idle);
+}
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+ if (adev->kfd.dev)
+ return vmid >= adev->vm_manager.first_kfd_vmid;
+
+ return false;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
+ uint16_t vmid)
+{
+ if (adev->family == AMDGPU_FAMILY_AI) {
+ int i;
+
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+ } else {
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
+ }
+
+ return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid,
+ enum TLB_FLUSH_TYPE flush_type,
+ uint32_t inst)
+{
+ bool all_hub = false;
+
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
+}
+
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
+{
+ return adev->have_atomics_support;
+}
+
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+ amdgpu_device_flush_hdp(adev, NULL);
+}
+
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
+{
+ amdgpu_umc_poison_handler(adev, reset);
+}
+
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload)
+{
+ int ret;
+
+ /* Device or IH ring is not ready so bail. */
+ ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
+ if (ret)
+ return ret;
+
+ /* Send payload to fence KFD interrupts */
+ amdgpu_amdkfd_interrupt(adev, payload);
+
+ return 0;
+}
+
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
+{
+ if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
+ return adev->gfx.ras->query_utcl2_poison_status(adev);
+ else
+ return false;
+}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+ return kgd2kfd_check_and_lock_kfd();
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+ kgd2kfd_unlock_kfd();
+}
+
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
+{
+ u64 tmp;
+ s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+
+ if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
+ tmp = adev->gmc.mem_partitions[mem_id].size;
+ do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+ return ALIGN_DOWN(tmp, PAGE_SIZE);
+ } else {
+ return adev->gmc.real_vram_size;
+ }
+}
+
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ struct amdgpu_ring_funcs *ring_funcs;
+ struct amdgpu_ring *ring;
+ int r = 0;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
+ if (!ring_funcs)
+ return -ENOMEM;
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring) {
+ r = -ENOMEM;
+ goto free_ring_funcs;
+ }
+
+ ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
+ ring->doorbell_index = doorbell_off;
+ ring->funcs = ring_funcs;
+
+ spin_lock(&kiq->ring_lock);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock(&kiq->ring_lock);
+ r = -ENOMEM;
+ goto free_ring;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
+
+ if (kiq_ring->sched.ready && !adev->job_hang)
+ r = amdgpu_ring_test_helper(kiq_ring);
+
+ spin_unlock(&kiq->ring_lock);
+
+free_ring:
+ kfree(ring);
+
+free_ring_funcs:
+ kfree(ring_funcs);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
new file mode 100644
index 000000000..2fe986072
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -0,0 +1,479 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
+
+#ifndef AMDGPU_AMDKFD_H_INCLUDED
+#define AMDGPU_AMDKFD_H_INCLUDED
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
+#include <kgd_kfd_interface.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
+
+extern uint64_t amdgpu_amdkfd_total_mem_size;
+
+enum TLB_FLUSH_TYPE {
+ TLB_FLUSH_LEGACY = 0,
+ TLB_FLUSH_LIGHTWEIGHT,
+ TLB_FLUSH_HEAVYWEIGHT
+};
+
+struct amdgpu_device;
+
+enum kfd_mem_attachment_type {
+ KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
+ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
+ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
+ KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
+};
+
+struct kfd_mem_attachment {
+ struct list_head list;
+ enum kfd_mem_attachment_type type;
+ bool is_mapped;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_device *adev;
+ uint64_t va;
+ uint64_t pte_flags;
+};
+
+struct kgd_mem {
+ struct mutex lock;
+ struct amdgpu_bo *bo;
+ struct dma_buf *dmabuf;
+ struct hmm_range *range;
+ struct list_head attachments;
+ /* protected by amdkfd_process_info.lock */
+ struct list_head validate_list;
+ uint32_t domain;
+ unsigned int mapped_to_gpu_memory;
+ uint64_t va;
+
+ uint32_t alloc_flags;
+
+ uint32_t invalid;
+ struct amdkfd_process_info *process_info;
+
+ struct amdgpu_sync sync;
+
+ bool aql_queue;
+ bool is_imported;
+};
+
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+ struct dma_fence base;
+ struct mm_struct *mm;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+ struct svm_range_bo *svm_bo;
+};
+
+struct amdgpu_kfd_dev {
+ struct kfd_dev *dev;
+ int64_t vram_used[MAX_XCP];
+ uint64_t vram_used_aligned[MAX_XCP];
+ bool init_complete;
+ struct work_struct reset_work;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+ struct dev_pagemap pgmap;
+};
+
+enum kgd_engine_type {
+ KGD_ENGINE_PFP = 1,
+ KGD_ENGINE_ME,
+ KGD_ENGINE_CE,
+ KGD_ENGINE_MEC1,
+ KGD_ENGINE_MEC2,
+ KGD_ENGINE_RLC,
+ KGD_ENGINE_SDMA1,
+ KGD_ENGINE_SDMA2,
+ KGD_ENGINE_MAX
+};
+
+
+struct amdkfd_process_info {
+ /* List head of all VMs that belong to a KFD process */
+ struct list_head vm_list_head;
+ /* List head for all KFD BOs that belong to a KFD process. */
+ struct list_head kfd_bo_list;
+ /* List of userptr BOs that are valid or invalid */
+ struct list_head userptr_valid_list;
+ struct list_head userptr_inval_list;
+ /* Lock to protect kfd_bo_list */
+ struct mutex lock;
+
+ /* Number of VMs */
+ unsigned int n_vms;
+ /* Eviction Fence */
+ struct amdgpu_amdkfd_fence *eviction_fence;
+
+ /* MMU-notifier related fields */
+ struct mutex notifier_lock;
+ uint32_t evicted_bos;
+ struct delayed_work restore_userptr_work;
+ struct pid *pid;
+ bool block_mmu_notifications;
+};
+
+int amdgpu_amdkfd_init(void);
+void amdgpu_amdkfd_fini(void);
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
+ const void *ih_ring_entry);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+ enum kgd_engine_type engine,
+ uint32_t vmid, uint64_t gpu_addr,
+ uint32_t *ib_cmd, uint32_t ib_len);
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
+ uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
+ uint32_t inst);
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
+
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev);
+
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+ int queue_bit);
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo);
+#if defined(CONFIG_DEBUG_FS)
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
+#endif
+#if IS_ENABLED(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem);
+#else
+static inline
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+ return false;
+}
+
+static inline
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+ return NULL;
+}
+
+static inline
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+ return 0;
+}
+
+static inline
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
+{
+ return 0;
+}
+#endif
+/* Shared API */
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr, bool mqd_gfx9);
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+ void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
+ enum kgd_engine_type type);
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ struct kfd_local_mem_info *mem_info,
+ struct amdgpu_xcp *xcp);
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
+void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev,
+ struct kfd_cu_info *cu_info);
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
+ uint64_t *bo_size, void *metadata_buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint32_t *flags, int8_t *xcp_id);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
+ struct amdgpu_device *src);
+int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
+ struct amdgpu_device *src,
+ bool is_min);
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+ uint32_t *payload);
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+ u32 inst);
+
+/* Read user wptr from a specified user address space with page fault
+ * disabled. The memory must be pinned and mapped to the hardware when
+ * this is called in hqd_load functions, so it should never fault in
+ * the first place. This resolves a circular lock dependency involving
+ * four locks, including the DQM lock and mmap_lock.
+ */
+#define read_user_wptr(mmptr, wptr, dst) \
+ ({ \
+ bool valid = false; \
+ if ((mmptr) && (wptr)) { \
+ pagefault_disable(); \
+ if ((mmptr) == current->mm) { \
+ valid = !get_user((dst), (wptr)); \
+ } else if (current->flags & PF_KTHREAD) { \
+ kthread_use_mm(mmptr); \
+ valid = !get_user((dst), (wptr)); \
+ kthread_unuse_mm(mmptr); \
+ } \
+ pagefault_enable(); \
+ } \
+ valid; \
+ })
+
+/* GPUVM API */
+#define drm_priv_to_vm(drm_priv) \
+ (&((struct amdgpu_fpriv *) \
+ ((struct drm_file *)(drm_priv))->driver_priv)->vm)
+
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm, u32 pasid);
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
+ void **process_info,
+ struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
+ void *drm_priv);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
+ void *drm_priv, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags, bool criu_resume);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
+ struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
+
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+ struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+ struct kfd_vm_fault_info *info);
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
+ struct dma_buf *dmabuf,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
+ struct tile_config *config);
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ bool reset);
+bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id);
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+
+#define KFD_XCP_MEM_ID(adev, xcp_id) \
+ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
+ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
+
+#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
+
+
+#if IS_ENABLED(CONFIG_HSA_AMD)
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
+#else
+static inline
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+}
+
+static inline
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+}
+
+static inline
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
+#else
+static inline
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+ return 0;
+}
+#endif
+
+/* KGD2KFD callbacks */
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+ struct dma_fence *fence);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+int kgd2kfd_init(void);
+void kgd2kfd_exit(void);
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ const struct kgd2kfd_shared_resources *gpu_resources);
+void kgd2kfd_device_exit(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+int kgd2kfd_post_reset(struct kfd_dev *kfd);
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(void);
+void kgd2kfd_unlock_kfd(void);
+#else
+static inline int kgd2kfd_init(void)
+{
+ return -ENOENT;
+}
+
+static inline void kgd2kfd_exit(void)
+{
+}
+
+static inline
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+ return NULL;
+}
+
+static inline
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ const struct kgd2kfd_shared_resources *gpu_resources)
+{
+ return false;
+}
+
+static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+}
+
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+{
+}
+
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+}
+
+static inline
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
+
+static inline
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
+{
+}
+
+static inline int kgd2kfd_check_and_lock_kfd(void)
+{
+ return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(void)
+{
+}
+#endif
+#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
new file mode 100644
index 000000000..aff08321e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_aldebaran_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+const struct kfd2kgd_calls aldebaran_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+ .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index 000000000..a7bdaf8d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
new file mode 100644
index 000000000..625db444d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_reset.h"
+#include "sdma0/sdma0_4_2_2_offset.h"
+#include "sdma0/sdma0_4_2_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_2_offset.h"
+#include "sdma1/sdma1_4_2_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ fallthrough;
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+ break;
+ case 2:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ break;
+ case 3:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+ break;
+ case 4:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+ mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+ break;
+ case 5:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+ mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+ break;
+ case 6:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+ mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+ break;
+ case 7:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+ mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+ break;
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+/*
+ * Helper used to suspend/resume gfx pipe for image post process work to set
+ * barrier behaviour.
+ */
+static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ if (!(ring && ring->sched.thread))
+ continue;
+
+ /* stop secheduler and drain ring. */
+ if (suspend) {
+ drm_sched_stop(&ring->sched, NULL);
+ r = amdgpu_fence_wait_empty(ring);
+ if (r)
+ goto out;
+ } else {
+ drm_sched_start(&ring->sched, false);
+ }
+ }
+
+out:
+ /* return on resume or failure to drain rings. */
+ if (!suspend || r)
+ return r;
+
+ return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
+}
+
+static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
+{
+ uint32_t data;
+
+ WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ amdgpu_amdkfd_suspend(adev, false);
+
+ if (suspend_resume_compute_scheduler(adev, true))
+ goto out;
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
+ data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !enable_waitcnt);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
+
+out:
+ suspend_resume_compute_scheduler(adev, false);
+
+ amdgpu_amdkfd_resume(adev, false);
+
+ up_read(&adev->reset_domain->sem);
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ set_barrier_auto_waitcnt(adev, false);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+const struct kfd2kgd_calls arcturus_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base =
+ kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_arcturus_enable_debug_trap,
+ .disable_debug_trap = kgd_arcturus_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
new file mode 100644
index 000000000..756c1a567
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+ void *mqd);
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644
index 000000000..469785d33
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ * evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_device_funcs->eviction_valuable().
+ *
+ * ttm_device_funcs->eviction_valuable() - will return false if the BO belongs
+ * to process X. Otherwise, it will return true to indicate BO can be
+ * evicted by TTM.
+ *
+ * If ttm_device_funcs->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ * nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ * --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo)
+{
+ struct amdgpu_amdkfd_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (fence == NULL)
+ return NULL;
+
+ /* This reference gets released in amdkfd_fence_release */
+ mmgrab(mm);
+ fence->mm = mm;
+ get_task_comm(fence->timeline_name, current);
+ spin_lock_init(&fence->lock);
+ fence->svm_bo = svm_bo;
+ dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+ context, atomic_inc_return(&fence_seq));
+
+ return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence;
+
+ if (!f)
+ return NULL;
+
+ fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+ if (fence && f->ops == &amdkfd_fence_ops)
+ return fence;
+
+ return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ * a KFD BO and schedules a job to move the BO.
+ * If fence is already signaled return true.
+ * If fence is not signaled schedule a evict KFD process work item.
+ *
+ * @f: dma_fence
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ if (!fence)
+ return false;
+
+ if (dma_fence_is_signaled(f))
+ return true;
+
+ if (!fence->svm_bo) {
+ if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+ return true;
+ } else {
+ if (!svm_range_schedule_evict_svm_bo(fence))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @f: dma_fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ /* Unconditionally signal the fence. The process is getting
+ * terminated.
+ */
+ if (WARN_ON(!fence))
+ return; /* Not an amdgpu_amdkfd_fence */
+
+ mmdrop(fence->mm);
+ kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ *
+ * Check if @mm is same as that of the fence @f, if same return TRUE else
+ * return FALSE.
+ * For svm bo, which support vram overcommitment, always return FALSE.
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ if (!fence)
+ return false;
+ else if (fence->mm == mm && !fence->svm_bo)
+ return true;
+
+ return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+ .get_driver_name = amdkfd_fence_get_driver_name,
+ .get_timeline_name = amdkfd_fence_get_timeline_name,
+ .enable_signaling = amdkfd_fence_enable_signaling,
+ .release = amdkfd_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
new file mode 100644
index 000000000..490c8f5dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "athub/athub_1_8_0_offset.h"
+#include "athub/athub_1_8_0_sh_mask.h"
+#include "oss/osssys_4_4_2_offset.h"
+#include "oss/osssys_4_4_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base =
+ SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
+ regSDMA_RLC0_RB_CNTL) -
+ regSDMA_RLC0_RB_CNTL;
+ uint32_t retval = sdma_engine_reg_base +
+ queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
+ return retval;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
+ reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA_RLC0_MIDCMD_DATA0;
+ reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+ temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
+ SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
+ u32 pasid, unsigned int vmid, uint32_t xcc_inst)
+{
+ unsigned long timeout;
+ unsigned int reg;
+ unsigned int phy_inst = GET_INST(GC, xcc_inst);
+ /* Every two XCCs share one AID */
+ unsigned int aid = phy_inst / 2;
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
+
+ timeout = jiffies + msecs_to_jiffies(10);
+ while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid))) {
+ if (time_after(jiffies, timeout)) {
+ pr_err("Fail to program VMID-PASID mapping\n");
+ return -ETIME;
+ }
+ cpu_relax();
+ }
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
+ /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
+ * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
+ * programs _LUT for XCC and "aid * 4" for AID where the XCC connects
+ * to.
+ */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4 + (phy_inst % 2) + 1);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+ aid * 4);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
+
+ return 0;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, hqd_end, data;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+ hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
+
+ for (reg = hqd_base; reg <= hqd_end; reg++)
+ WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
+ data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
+ queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v9_4_3_validate_trap_override_request(
+ struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override(
+ struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v9_4_3_set_address_watch(
+ struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_4_3_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base =
+ kgd_gfx_v9_set_vm_context_page_table_base,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings =
+ kgd_gfx_v9_program_trap_handler_settings,
+ .build_grace_period_packet_info =
+ kgd_gfx_v9_build_grace_period_packet_info,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
+ .validate_trap_override_request =
+ kgd_gfx_v9_4_3_validate_trap_override_request,
+ .set_wave_launch_trap_override =
+ kgd_gfx_v9_4_3_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
new file mode 100644
index 000000000..69810b3f1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -0,0 +1,1055 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
+
+ pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+#if 0
+ /* TODO: uncomment this code when the hardware support is ready. */
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ pr_debug("ATHUB mapping update finished\n");
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+#endif
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("update mapping for IH block and mmhub");
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ /* On gfx10, mmSDMA1_xxx registers are defined NOT based
+ * on SDMA1 base address (dw 0x1860) but based on SDMA0
+ * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
+ * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
+ * below
+ */
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
+ };
+
+ uint32_t retval = sdma_engine_reg_base[engine_id]
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
+
+ return retval;
+}
+
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
+{
+ uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
+ mmTCP_WATCH0_ADDR_H;
+
+ pr_debug("kfd: reg watch base address: 0x%x\n", retval);
+
+ return retval;
+}
+#endif
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v10_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v10_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uint64_t)wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ struct v10_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
+
+ return r;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v10_compute_mqd *m = get_mqd(mqd);
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+#if 0
+ unsigned long flags;
+ int retry;
+#endif
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+#if 0 /* Is this still needed? */
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+#endif
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* SDMA is on gfxhub as well for Navi1* series */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~3500 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because current GFX10 chips cannot support multi-process debugging due to
+ * trap configuration and masking being limited to global scope. Always
+ * assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ int i;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ /* assume gfx off is disabled for the debug session if rlc restore not supported. */
+ if (restore_dbg_registers) {
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, 1 << vmid);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ /*
+ * The CP cannont handle a 0 grace period input and will result in
+ * an infinite grace period being set so set to 1 to prevent this.
+ */
+ if (grace_period == 0)
+ grace_period = 1;
+
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ grace_period);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+static void program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+ lower_32_bits(tba_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+ upper_32_bits(tba_addr >> 8) |
+ (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ lower_32_bits(tma_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ upper_32_bits(tma_addr >> 8));
+
+ unlock_srbm(adev);
+}
+
+const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hiq_mqd_load = kgd_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .wave_control_execute = kgd_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .program_trap_handler_settings = program_trap_handler_settings,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
new file mode 100644
index 000000000..67bcaa3d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
new file mode 100644
index 000000000..8c8437a43
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(adev);
+}
+
+/* ATC is defeatured on Sienna_Cichlid */
+static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+ vmid, pasid);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value);
+
+ return 0;
+}
+
+static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ fallthrough;
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 2:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 3:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v10_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct v10_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uint64_t)wptr));
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ struct v10_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
+
+ return r;
+}
+
+static int hqd_dump_v10_3(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
+ void *mqd)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v10_compute_mqd *m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue pipe %d queue %d preemption failed\n",
+ pipe_id, queue_id);
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int wave_control_execute_v10_3(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ /* SDMA is on gfxhub as well for Navi1* series */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+ lower_32_bits(tba_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+ upper_32_bits(tba_addr >> 8) |
+ (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+ lower_32_bits(tma_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+ upper_32_bits(tma_addr >> 8));
+
+ unlock_srbm(adev);
+}
+
+const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
+ .program_sh_mem_settings = program_sh_mem_settings_v10_3,
+ .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
+ .init_interrupts = init_interrupts_v10_3,
+ .hqd_load = hqd_load_v10_3,
+ .hiq_mqd_load = hiq_mqd_load_v10_3,
+ .hqd_sdma_load = hqd_sdma_load_v10_3,
+ .hqd_dump = hqd_dump_v10_3,
+ .hqd_sdma_dump = hqd_sdma_dump_v10_3,
+ .hqd_is_occupied = hqd_is_occupied_v10_3,
+ .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
+ .hqd_destroy = hqd_destroy_v10_3,
+ .hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
+ .wave_control_execute = wave_control_execute_v10_3,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+ .program_trap_handler_settings = program_trap_handler_settings_v10_3,
+ .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v10_set_address_watch,
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
new file mode 100644
index 000000000..d67d003ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -0,0 +1,812 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "soc15d.h"
+#include "v11_structs.h"
+#include "soc21.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
+
+ unlock_srbm(adev);
+}
+
+static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+ vmid, pasid);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
+
+ return 0;
+}
+
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v11_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm, uint32_t inst)
+{
+ struct v11_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ struct v11_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ release_queue(adev);
+
+ return r;
+}
+
+static int hqd_dump_v11(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (7+11+1+12+12)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = regSDMA0_QUEUE0_RB_CNTL;
+ reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
+ reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
+ reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
+ reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
+ reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v11_compute_mqd *m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue pipe %d queue %d preemption failed\n",
+ pipe_id, queue_id);
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v11_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+ temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+ if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
+ SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static int wave_control_execute_v11(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* SDMA is on gfxhub as well for gfx11 adapters */
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
+ *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
+ .program_sh_mem_settings = program_sh_mem_settings_v11,
+ .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
+ .init_interrupts = init_interrupts_v11,
+ .hqd_load = hqd_load_v11,
+ .hiq_mqd_load = hiq_mqd_load_v11,
+ .hqd_sdma_load = hqd_sdma_load_v11,
+ .hqd_dump = hqd_dump_v11,
+ .hqd_sdma_dump = hqd_sdma_dump_v11,
+ .hqd_is_occupied = hqd_is_occupied_v11,
+ .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
+ .hqd_destroy = hqd_destroy_v11,
+ .hqd_sdma_destroy = hqd_sdma_destroy_v11,
+ .wave_control_execute = wave_control_execute_v11,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+ .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v11_set_address_watch,
+ .clear_address_watch = kgd_gfx_v11_clear_address_watch
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
new file mode 100644
index 000000000..6bf448ab3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "cikd.h"
+#include "cik_sdma.h"
+#include "gfx_v7_0.h"
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+#include "cik_structs.h"
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+enum {
+ MAX_TRAPID = 8, /* 3 bits in the bitfield. */
+ MAX_WATCH_ADDRESSES = 4
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&adev->srbm_mutex);
+ WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ WREG32(mmSRBM_GFX_CNTL, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+ WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished and the
+ * SW cleared it. So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+ while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+ return 0;
+}
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
+{
+ uint32_t retval;
+
+ retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+ m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
+
+ return retval;
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+ return (struct cik_sdma_rlc_registers *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct cik_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
+ bool valid_wptr = false;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Copy userspace write pointer value to register.
+ * Activate doorbell logic to monitor subsequent changes.
+ */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* read_user_ptr may take the mm->mmap_lock.
+ * release srbm_mutex to avoid circular dependency between
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
+ */
+ release_queue(adev);
+ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+ acquire_queue(adev, pipe_id, queue_id);
+ if (valid_wptr)
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(mmCP_HQD_ACTIVE, data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS (35+4)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct cik_sdma_rlc_registers *m;
+ unsigned long end_jiffies;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t data;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdma_rlc_rb_rptr);
+
+ if (read_user_wptr(mm, wptr, data))
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
+ else
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdma_rlc_rb_rptr);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ m->sdma_rlc_virtual_addr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdma_rlc_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdma_rlc_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdma_rlc_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+ queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32(mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+ high == RREG32(mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t temp;
+ enum hqd_dequeue_request_type type;
+ unsigned long flags, end_jiffies;
+ int retry;
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(mmCP_HQD_ACTIVE);
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out\n");
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct cik_sdma_rlc_registers *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32(mmSQ_CMD, sq_cmd);
+
+ /* Restore the GRBM_GFX_INDEX register */
+
+ data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
+
+ WREG32(mmGRBM_GFX_INDEX, data);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static void set_scratch_backing_va(struct amdgpu_device *adev,
+ uint64_t va, uint32_t vmid)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+ unlock_srbm(adev);
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID\n");
+ return;
+ }
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+ lower_32_bits(page_table_base));
+}
+
+ /**
+ * read_vmid_from_vmfault_reg - read vmid from register
+ *
+ * adev: amdgpu_device pointer
+ * @vmid: vmid pointer
+ * read vmid from register (CIK).
+ */
+static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
+{
+ uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+ return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
+
+const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .wave_control_execute = kgd_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
new file mode 100644
index 000000000..cd06e4a6d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -0,0 +1,602 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gfx_v8_0.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "vi_structs.h"
+#include "vid.h"
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+ mutex_lock(&adev->srbm_mutex);
+ WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ WREG32(mmSRBM_GFX_CNTL, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+ WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+ unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+ while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+ cpu_relax();
+ WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+ return 0;
+}
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
+{
+ uint32_t retval;
+
+ retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+ m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
+
+ return retval;
+}
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+ return (struct vi_mqd *)mqd;
+}
+
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct vi_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+ struct vi_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, wptr_val, data;
+ bool valid_wptr = false;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(mmRLC_CP_SCHEDULERS);
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(mmRLC_CP_SCHEDULERS, value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+ * This is safe since EOP RPTR==WPTR for any inactive HQD
+ * on ASICs that do not support context-save.
+ * EOP writes/reads can start anywhere in the ring.
+ */
+ if (adev->asic_type != CHIP_TONGA) {
+ WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+ WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+ WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+ }
+
+ for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Copy userspace write pointer value to register.
+ * Activate doorbell logic to monitor subsequent changes.
+ */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* read_user_ptr may take the mm->mmap_lock.
+ * release srbm_mutex to avoid circular dependency between
+ * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
+ */
+ release_queue(adev);
+ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+ acquire_queue(adev, pipe_id, queue_id);
+ if (valid_wptr)
+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(mmCP_HQD_ACTIVE, data);
+
+ release_queue(adev);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS (54+4)
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct vi_sdma_mqd *m;
+ unsigned long end_jiffies;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t data;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+
+ if (read_user_wptr(mm, wptr, data))
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
+ else
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ m->sdmax_rlcx_virtual_addr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+ queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4+2+3+7)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
+ reg++)
+ DUMP_REG(sdma_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(adev, pipe_id, queue_id);
+ act = RREG32(mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+ high == RREG32(mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ release_queue(adev);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct vi_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t temp;
+ enum hqd_dequeue_request_type type;
+ unsigned long flags, end_jiffies;
+ int retry;
+ struct vi_mqd *m = get_mqd(mqd);
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(mmCP_HQD_ACTIVE);
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(adev);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(adev);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct vi_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32(mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void set_scratch_backing_va(struct amdgpu_device *adev,
+ uint64_t va, uint32_t vmid)
+{
+ lock_srbm(adev, 0, 0, 0, vmid);
+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+ unlock_srbm(adev);
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID\n");
+ return;
+ }
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+ lower_32_bits(page_table_base));
+}
+
+const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .wave_control_execute = kgd_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000..3c45a188b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1177 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v9_0.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid, uint32_t inst)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
+}
+
+static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
+{
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
+}
+
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
+{
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst)
+{
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
+
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst)
+{
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << (vmid + 16))))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << (vmid + 16));
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
+
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ fallthrough;
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr, uint32_t wptr_shift,
+ uint32_t wptr_mask, struct mm_struct *mm,
+ uint32_t inst)
+{
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
+ data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return 0;
+}
+
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
+ struct v9_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return r;
+}
+
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
+ high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
+ retval = true;
+ }
+ kgd_gfx_v9_release_queue(adev, inst);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v9_mqd *m = get_mqd(mqd);
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+ type = SAVE_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ kgd_gfx_v9_release_queue(adev, inst);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ kgd_gfx_v9_release_queue(adev, inst);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+ unsigned int utimeout)
+{
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ * Target VMID to stall/unstall.
+ *
+ * stall:
+ * 0-unstall wave launch (enable), 1-stall wave launch (disable).
+ * After wavefront launch has been stalled, allocated waves must drain from
+ * SPI in order for debug trap settings to take effect on those waves.
+ * This is roughly a ~96 clock cycle wait on SPI where a read on
+ * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ * KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ * NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ * because GFX9.4.1 cannot support multi-process debugging due to trap
+ * configuration and masking being limited to global scope. Always assume
+ * single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall)
+{
+ int i;
+ uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+ stall ? 1 << vmid : 0);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+ stall ? 1 : 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ if (!stall)
+ return;
+
+ for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+ /* The SPI_GDBG_TRAP_MASK register is global and affects all
+ * processes. Only allow OR-ing the address-watch bit, since
+ * this only affects processes under the debugger. Other bits
+ * should stay 0 to avoid the debugger interfering with other
+ * processes.
+ */
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+ return -EINVAL;
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_cntl_prev)
+{
+ uint32_t data, wave_cntl_prev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+ *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+ trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+ (*trap_mask_prev & ~trap_mask_request);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ /* We need to preserve wave launch mode stall settings. */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_mode_set = !!wave_launch_mode;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
+ /* Turning off this watch point until we set all the registers */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 0);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ /* Enable the watch point */
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_cntl);
+
+ return 0;
+}
+
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst)
+
+{
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+
+ adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+ mutex_lock(&adev->srbm_mutex);
+ mutex_lock(&adev->grbm_idx_mutex);
+
+}
+
+static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+ mutex_unlock(&adev->grbm_idx_mutex);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+/**
+ * get_wave_count: Read device registers to get number of waves in flight for
+ * a particular queue. The method also returns the VMID associated with the
+ * queue.
+ *
+ * @adev: Handle of device whose registers are to be read
+ * @queue_idx: Index of queue in the queue-map bit-field
+ * @wave_cnt: Output parameter updated with number of waves in flight
+ * @vmid: Output parameter updated with VMID of queue whose wave count
+ * is being collected
+ * @inst: xcc's instance number on a multi-XCC setup
+ */
+static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
+ int *wave_cnt, int *vmid, uint32_t inst)
+{
+ int pipe_idx;
+ int queue_slot;
+ unsigned int reg_val;
+
+ /*
+ * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
+ * parameters to read out waves in flight. Get VMID if there are
+ * non-zero waves in flight.
+ */
+ *vmid = 0xFF;
+ *wave_cnt = 0;
+ pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
+ queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
+ queue_slot);
+ *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+ if (*wave_cnt != 0)
+ *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
+ CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+}
+
+/**
+ * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * shader engine and aggregates the number of waves that are in flight for the
+ * process whose pasid is provided as a parameter. The process could have ZERO
+ * or more queues running and submitting waves to compute units.
+ *
+ * @adev: Handle of device from which to get number of waves in flight
+ * @pasid: Identifies the process for which this query call is invoked
+ * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
+ * belong to process with given pasid
+ * @max_waves_per_cu: Output parameter updated with maximum number of waves
+ * possible per Compute Unit
+ * @inst: xcc's instance number on a multi-XCC setup
+ *
+ * Note: It's possible that the device has too many queues (oversubscription)
+ * in which case a VMID could be remapped to a different PASID. This could lead
+ * to an inaccurate wave count. Following is a high-level sequence:
+ * Time T1: vmid = getVmid(); vmid is associated with Pasid P1
+ * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
+ * In the sequence above wave count obtained from time T1 will be incorrectly
+ * lost or added to total wave count.
+ *
+ * The registers that provide the waves in flight are:
+ *
+ * SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a
+ * queue is slotted, OFF if there is no queue. A process could have ZERO or
+ * more queues slotted and submitting waves to be run on compute units. Even
+ * when there is a queue it is possible there could be zero wave fronts, this
+ * can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem
+ * command
+ *
+ * For each bit that is ON from above:
+ *
+ * Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
+ * number of waves that are in flight for the queue at specified index. The
+ * index ranges from 0 to 7.
+ *
+ * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
+ * of the wave(s).
+ *
+ * Determine if VMID from above step maps to pasid provided as parameter. If
+ * it matches agrregate the wave count. That the VMID will not match pasid is
+ * a normal condition i.e. a device is expected to support multiple queues
+ * from multiple proceses.
+ *
+ * Reading registers referenced above involves programming GRBM appropriately
+ */
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
+ int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
+{
+ int qidx;
+ int vmid;
+ int se_idx;
+ int sh_idx;
+ int se_cnt;
+ int sh_cnt;
+ int wave_cnt;
+ int queue_map;
+ int pasid_tmp;
+ int max_queue_cnt;
+ int vmid_wave_cnt = 0;
+ DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+
+ lock_spi_csq_mutexes(adev);
+ soc15_grbm_select(adev, 1, 0, 0, 0, inst);
+
+ /*
+ * Iterate through the shader engines and arrays of the device
+ * to get number of waves in flight
+ */
+ bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
+ KGD_MAX_QUEUES);
+ max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+ sh_cnt = adev->gfx.config.max_sh_per_se;
+ se_cnt = adev->gfx.config.max_shader_engines;
+ for (se_idx = 0; se_idx < se_cnt; se_idx++) {
+ for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+
+ amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+ /*
+ * Assumption: queue map encodes following schema: four
+ * pipes per each micro-engine, with each pipe mapping
+ * eight queues. This schema is true for GFX9 devices
+ * and must be verified for newer device families
+ */
+ for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+
+ /* Skip qeueus that are not associated with
+ * compute functions
+ */
+ if (!test_bit(qidx, cp_queue_bitmap))
+ continue;
+
+ if (!(queue_map & (1 << qidx)))
+ continue;
+
+ /* Get number of waves in flight and aggregate them */
+ get_wave_count(adev, qidx, &wave_cnt, &vmid,
+ inst);
+ if (wave_cnt != 0) {
+ pasid_tmp =
+ RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
+ mmIH_VMID_0_LUT) + vmid);
+ if (pasid_tmp == pasid)
+ vmid_wave_cnt += wave_cnt;
+ }
+ }
+ }
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, inst);
+ unlock_spi_csq_mutexes(adev);
+
+ /* Update the output parameters and return */
+ *pasid_wave_cnt = vmid_wave_cnt;
+ *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
+ adev->gfx.cu_info.max_waves_per_simd;
+}
+
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ /*
+ * The CP cannot handle a 0 grace period input and will result in
+ * an infinite grace period being set so set to 1 to prevent this.
+ */
+ if (grace_period == 0)
+ grace_period = 1;
+
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ grace_period);
+
+ *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
+{
+ kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
+
+ /*
+ * Program TBA registers
+ */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
+ lower_32_bits(tba_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
+ upper_32_bits(tba_addr >> 8));
+
+ /*
+ * Program TMA registers
+ */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
+ lower_32_bits(tma_addr >> 8));
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
+ upper_32_bits(tma_addr >> 8));
+
+ kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
+ .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_gfx_v9_init_interrupts,
+ .hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_gfx_v9_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_gfx_v9_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .wave_control_execute = kgd_gfx_v9_wave_control_execute,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+ .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+ .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v9_set_address_watch,
+ .clear_address_watch = kgd_gfx_v9_clear_address_watch,
+ .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+ .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
new file mode 100644
index 000000000..ce424615f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases, uint32_t inst);
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+ unsigned int vmid, uint32_t inst);
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t inst);
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm, uint32_t inst);
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off, uint32_t inst);
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid);
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
+ int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+ uint32_t inst);
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst);
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id);
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+ uint32_t vmid,
+ bool stall);
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid);
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid);
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst);
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+ uint32_t *wait_times,
+ uint32_t inst);
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
new file mode 100644
index 000000000..e03601113
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -0,0 +1,3026 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/dma-buf.h>
+#include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/drm_exec.h>
+
+#include "amdgpu_object.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
+/*
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
+
+/* Impose limit on how much memory KFD can use */
+static struct {
+ uint64_t max_system_mem_limit;
+ uint64_t max_ttm_mem_limit;
+ int64_t system_mem_used;
+ int64_t ttm_mem_used;
+ spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+
+static const char * const domain_bit_to_string[] = {
+ "CPU",
+ "GTT",
+ "VRAM",
+ "GDS",
+ "GWS",
+ "OA"
+};
+
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
+
+static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
+ struct kgd_mem *mem)
+{
+ struct kfd_mem_attachment *entry;
+
+ list_for_each_entry(entry, &mem->attachments, list)
+ if (entry->bo_va->base.vm == avm)
+ return true;
+
+ return false;
+}
+
+/**
+ * reuse_dmamap() - Check whether adev can share the original
+ * userptr BO
+ *
+ * If both adev and bo_adev are in direct mapping or
+ * in the same iommu group, they can share the original BO.
+ *
+ * @adev: Device to which can or cannot share the original BO
+ * @bo_adev: Device to which allocated BO belongs to
+ *
+ * Return: returns true if adev can share original userptr BO,
+ * false otherwise.
+ */
+static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
+{
+ return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
+ (adev->dev->iommu_group == bo_adev->dev->iommu_group);
+}
+
+/* Set memory usage limits. Current, limits are
+ * System (TTM + userptr) memory - 15/16th System RAM
+ * TTM memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+ struct sysinfo si;
+ uint64_t mem;
+
+ if (kfd_mem_limit.max_system_mem_limit)
+ return;
+
+ si_meminfo(&si);
+ mem = si.freeram - si.freehigh;
+ mem *= si.mem_unit;
+
+ spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
+ pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20),
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+}
+
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+ kfd_mem_limit.system_mem_used += size;
+}
+
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
+
+/**
+ * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer.
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
+ *
+ * Return:
+ * returns -ENOMEM in case of error, ZERO otherwise
+ */
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
+{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ size_t system_mem_needed, ttm_mem_needed, vram_needed;
+ int ret = 0;
+ uint64_t vram_size = 0;
+
+ system_mem_needed = 0;
+ ttm_mem_needed = 0;
+ vram_needed = 0;
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ /*
+ * Conservatively round up the allocation requirement to 2 MB
+ * to avoid fragmentation caused by 4K allocations in the tail
+ * 2M BO chunk.
+ */
+ vram_needed = size;
+ /*
+ * For GFX 9.4.3, get the VRAM size from XCP structs
+ */
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ return -EINVAL;
+
+ vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+ if (adev->gmc.is_app_apu) {
+ system_mem_needed = size;
+ ttm_mem_needed = size;
+ }
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ system_mem_needed = size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ return -ENOMEM;
+ }
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
+ kfd_mem_limit.max_system_mem_limit)
+ pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+
+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
+ kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
+ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) ||
+ (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
+ vram_size - reserved_for_pt)) {
+ ret = -ENOMEM;
+ goto release;
+ }
+
+ /* Update memory accounting by decreasing available system
+ * memory, TTM memory and GPU memory as computed above
+ */
+ WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+ if (adev && xcp_id >= 0) {
+ adev->kfd.vram_used[xcp_id] += vram_needed;
+ adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+ vram_needed :
+ ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
+ kfd_mem_limit.system_mem_used += system_mem_needed;
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+ return ret;
+}
+
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag, int8_t xcp_id)
+{
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+ if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags vram is set");
+ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+ goto release;
+
+ if (adev) {
+ adev->kfd.vram_used[xcp_id] -= size;
+ if (adev->gmc.is_app_apu) {
+ adev->kfd.vram_used_aligned[xcp_id] -= size;
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
+ } else {
+ adev->kfd.vram_used_aligned[xcp_id] -=
+ ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
+ }
+ } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ kfd_mem_limit.system_mem_used -= size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+ goto release;
+ }
+ WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+ "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
+ WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+ "KFD TTM memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "KFD system memory accounting unbalanced");
+
+release:
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ u32 alloc_flags = bo->kfd_bo->alloc_flags;
+ u64 size = amdgpu_bo_size(bo);
+
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+ bo->xcp_id);
+
+ kfree(bo->kfd_bo);
+}
+
+/**
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ *
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ * in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+ struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+ struct drm_gem_object *gem_obj;
+ int ret;
+ uint64_t flags = 0;
+
+ ret = amdgpu_bo_reserve(mem->bo, false);
+ if (ret)
+ return ret;
+
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
+ flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
+
+ ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
+ AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
+ ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
+
+ amdgpu_bo_unreserve(mem->bo);
+
+ if (ret) {
+ pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
+ return -EINVAL;
+ }
+
+ *bo_out = gem_to_amdgpu_bo(gem_obj);
+ (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+ return ret;
+}
+
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
+ * reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] This eviction fence is removed if it
+ * is present in the shared list.
+ *
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+ struct amdgpu_amdkfd_fence *ef)
+{
+ struct dma_fence *replacement;
+
+ if (!ef)
+ return -EINVAL;
+
+ /* TODO: Instead of block before we should use the fence of the page
+ * table update and TLB flush here directly.
+ */
+ replacement = dma_fence_get_stub();
+ dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+ replacement, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(replacement);
+ return 0;
+}
+
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+ struct amdgpu_bo *root = bo;
+ struct amdgpu_vm_bo_base *vm_bo;
+ struct amdgpu_vm *vm;
+ struct amdkfd_process_info *info;
+ struct amdgpu_amdkfd_fence *ef;
+ int ret;
+
+ /* we can always get vm_bo from root PD bo.*/
+ while (root->parent)
+ root = root->parent;
+
+ vm_bo = root->vm_bo;
+ if (!vm_bo)
+ return 0;
+
+ vm = vm_bo->vm;
+ if (!vm)
+ return 0;
+
+ info = vm->process_info;
+ if (!info || !info->eviction_fence)
+ return 0;
+
+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
+ struct amdgpu_amdkfd_fence, base);
+
+ BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+ dma_resv_unlock(bo->tbo.base.resv);
+
+ dma_fence_put(&ef->base);
+ return ret;
+}
+
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+ bool wait)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ int ret;
+
+ if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+ "Called with userptr BO"))
+ return -EINVAL;
+
+ amdgpu_bo_placement_from_domain(bo, domain);
+
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto validate_fail;
+ if (wait)
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+
+validate_fail:
+ return ret;
+}
+
+static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+ return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
+}
+
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+{
+ struct amdgpu_bo *pd = vm->root.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ int ret;
+
+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
+ if (ret) {
+ pr_err("failed to validate PT BOs\n");
+ return ret;
+ }
+
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+
+ return 0;
+}
+
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+ struct amdgpu_bo *pd = vm->root.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ int ret;
+
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ return ret;
+
+ return amdgpu_sync_fence(sync, vm->last_update);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+ AMDGPU_VM_MTYPE_DEFAULT;
+
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
+}
+
+/**
+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+ if (!sg)
+ return NULL;
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+ kfree(sg);
+ return NULL;
+ }
+ sg_dma_address(sg->sgl) = addr;
+ sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = size;
+#endif
+ return sg;
+}
+
+static int
+kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ enum dma_data_direction direction =
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ int ret;
+
+ if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+ return -EINVAL;
+
+ ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+ if (unlikely(!ttm->sg))
+ return -ENOMEM;
+
+ /* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+ ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+ ttm->num_pages, 0,
+ (u64)ttm->num_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (unlikely(ret))
+ goto free_sg;
+
+ ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+ if (unlikely(ret))
+ goto release_sg;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unmap_sg;
+
+ return 0;
+
+unmap_sg:
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+release_sg:
+ pr_err("DMA map userptr failed: %d\n", ret);
+ sg_free_table(ttm->sg);
+free_sg:
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ return ret;
+}
+
+static int
+kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ int ret;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/**
+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it instead
+ * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without regard to
+ * device ownership. This is because access requests for MMIO go through PCIe root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ * in updating requesting device's page table
+ * - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ * accessible. This allows an update of requesting device's page table
+ * with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * - Mapping of DOORBELL or MMIO BO of same or peer device
+ * - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+ dma_addr_t dma_addr;
+ bool mmio;
+ int ret;
+
+ /* Expect SG Table of dmapmap BO to be NULL */
+ mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+ if (unlikely(ttm->sg)) {
+ pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
+ return -EINVAL;
+ }
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+ pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+ pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+ dma_addr = dma_map_resource(adev->dev, dma_addr,
+ mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_mapping_error(adev->dev, dma_addr);
+ if (unlikely(ret))
+ return ret;
+ pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+ ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+ if (unlikely(!ttm->sg)) {
+ ret = -ENOMEM;
+ goto unmap_sg;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret))
+ goto free_sg;
+
+ return ret;
+
+free_sg:
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+unmap_sg:
+ dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+ dir, DMA_ATTR_SKIP_CPU_SYNC);
+ return ret;
+}
+
+static int
+kfd_mem_dmamap_attachment(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ switch (attachment->type) {
+ case KFD_MEM_ATT_SHARED:
+ return 0;
+ case KFD_MEM_ATT_USERPTR:
+ return kfd_mem_dmamap_userptr(mem, attachment);
+ case KFD_MEM_ATT_DMABUF:
+ return kfd_mem_dmamap_dmabuf(attachment);
+ case KFD_MEM_ATT_SG:
+ return kfd_mem_dmamap_sg_bo(mem, attachment);
+ default:
+ WARN_ON_ONCE(1);
+ }
+ return -EINVAL;
+}
+
+static void
+kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ enum dma_data_direction direction =
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ struct ttm_operation_ctx ctx = {.interruptible = false};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+
+ if (unlikely(!ttm->sg))
+ return;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+ /* This is a no-op. We don't want to trigger eviction fences when
+ * unmapping DMABufs. Therefore the invalidation (moving to system
+ * domain) is done in kfd_mem_dmamap_dmabuf.
+ */
+}
+
+/**
+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ * - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ * - Free SG Table that is used to encapsulate DMA mapped memory of
+ * peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ * Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+
+ if (unlikely(!ttm->sg)) {
+ pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+ return;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+ ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ bo->tbo.sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ switch (attachment->type) {
+ case KFD_MEM_ATT_SHARED:
+ break;
+ case KFD_MEM_ATT_USERPTR:
+ kfd_mem_dmaunmap_userptr(mem, attachment);
+ break;
+ case KFD_MEM_ATT_DMABUF:
+ kfd_mem_dmaunmap_dmabuf(attachment);
+ break;
+ case KFD_MEM_ATT_SG:
+ kfd_mem_dmaunmap_sg_bo(mem, attachment);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
+{
+ if (!mem->dmabuf) {
+ struct dma_buf *ret = amdgpu_gem_prime_export(
+ &mem->bo->tbo.base,
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DRM_RDWR : 0);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+ mem->dmabuf = ret;
+ }
+
+ return 0;
+}
+
+static int
+kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
+ struct amdgpu_bo **bo)
+{
+ struct drm_gem_object *gobj;
+ int ret;
+
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ return ret;
+
+ gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+ if (IS_ERR(gobj))
+ return PTR_ERR(gobj);
+
+ *bo = gem_to_amdgpu_bo(gobj);
+ (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+ return 0;
+}
+
+/* kfd_mem_attach - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 0. Create BO for DMA mapping, if needed
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a. Validate new page tables and directories
+ */
+static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
+ struct amdgpu_vm *vm, bool is_aql)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ unsigned long bo_size = mem->bo->tbo.base.size;
+ uint64_t va = mem->va;
+ struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
+ struct amdgpu_bo *bo[2] = {NULL, NULL};
+ bool same_hive = false;
+ int i, ret;
+
+ if (!va) {
+ pr_err("Invalid VA when adding BO to VM\n");
+ return -EINVAL;
+ }
+
+ /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
+ *
+ * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+ * In contrast the access path of VRAM BOs depens upon the type of
+ * link that connects the peer device. Access over PCIe is allowed
+ * if peer device has large BAR. In contrast, access over xGMI is
+ * allowed for both small and large BAR configurations of peer device
+ */
+ if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
+ ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
+ same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+ if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
+ return -EINVAL;
+ }
+
+ for (i = 0; i <= is_aql; i++) {
+ attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+ if (unlikely(!attachment[i])) {
+ ret = -ENOMEM;
+ goto unwind;
+ }
+
+ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+ va + bo_size, vm);
+
+ if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+ (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
+ /* Mappings on the local GPU, or VRAM mappings in the
+ * local hive, or userptr mapping can reuse dma map
+ * address space share the original BO
+ */
+ attachment[i]->type = KFD_MEM_ATT_SHARED;
+ bo[i] = mem->bo;
+ drm_gem_object_get(&bo[i]->tbo.base);
+ } else if (i > 0) {
+ /* Multiple mappings on the same GPU share the BO */
+ attachment[i]->type = KFD_MEM_ATT_SHARED;
+ bo[i] = bo[0];
+ drm_gem_object_get(&bo[i]->tbo.base);
+ } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ /* Create an SG BO to DMA-map userptrs on other GPUs */
+ attachment[i]->type = KFD_MEM_ATT_USERPTR;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
+ } else if (mem->bo->tbo.type == ttm_bo_type_sg) {
+ WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
+ "Handing invalid SG BO in ATTACH request");
+ attachment[i]->type = KFD_MEM_ATT_SG;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Enable acces to GTT and VRAM BOs of peer devices */
+ } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
+ mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ attachment[i]->type = KFD_MEM_ATT_DMABUF;
+ ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
+ } else {
+ WARN_ONCE(true, "Handling invalid ATTACH request");
+ ret = -EINVAL;
+ goto unwind;
+ }
+
+ /* Add BO to VM internal data structures */
+ ret = amdgpu_bo_reserve(bo[i], false);
+ if (ret) {
+ pr_debug("Unable to reserve BO during memory attach");
+ goto unwind;
+ }
+ attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ amdgpu_bo_unreserve(bo[i]);
+ if (unlikely(!attachment[i]->bo_va)) {
+ ret = -ENOMEM;
+ pr_err("Failed to add BO object to VM. ret == %d\n",
+ ret);
+ goto unwind;
+ }
+ attachment[i]->va = va;
+ attachment[i]->pte_flags = get_pte_flags(adev, mem);
+ attachment[i]->adev = adev;
+ list_add(&attachment[i]->list, &mem->attachments);
+
+ va += bo_size;
+ }
+
+ return 0;
+
+unwind:
+ for (; i >= 0; i--) {
+ if (!attachment[i])
+ continue;
+ if (attachment[i]->bo_va) {
+ amdgpu_bo_reserve(bo[i], true);
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+ amdgpu_bo_unreserve(bo[i]);
+ list_del(&attachment[i]->list);
+ }
+ if (bo[i])
+ drm_gem_object_put(&bo[i]->tbo.base);
+ kfree(attachment[i]);
+ }
+ return ret;
+}
+
+static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
+{
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+ pr_debug("\t remove VA 0x%llx in entry %p\n",
+ attachment->va, attachment);
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+ drm_gem_object_put(&bo->tbo.base);
+ list_del(&attachment->list);
+ kfree(attachment);
+}
+
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info,
+ bool userptr)
+{
+ mutex_lock(&process_info->lock);
+ if (userptr)
+ list_add_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
+ else
+ list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
+ mutex_unlock(&process_info->lock);
+}
+
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info)
+{
+ mutex_lock(&process_info->lock);
+ list_del(&mem->validate_list);
+ mutex_unlock(&process_info->lock);
+}
+
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+ bool criu_resume)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ struct amdgpu_bo *bo = mem->bo;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct hmm_range *range;
+ int ret = 0;
+
+ mutex_lock(&process_info->lock);
+
+ ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
+ if (ret) {
+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = amdgpu_hmm_register(bo, user_addr);
+ if (ret) {
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ if (criu_resume) {
+ /*
+ * During a CRIU restore operation, the userptr buffer objects
+ * will be validated in the restore_userptr_work worker at a
+ * later stage when it is scheduled by another ioctl called by
+ * CRIU master process for the target pid for restore.
+ */
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
+ mutex_unlock(&process_info->lock);
+ return 0;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
+ if (ret) {
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ goto unregister_out;
+ }
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("%s: Failed to reserve BO\n", __func__);
+ goto release_out;
+ }
+ amdgpu_bo_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ pr_err("%s: failed to validate BO\n", __func__);
+ amdgpu_bo_unreserve(bo);
+
+release_out:
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+unregister_out:
+ if (ret)
+ amdgpu_hmm_unregister(bo);
+out:
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+ /* DRM execution context for the reservation */
+ struct drm_exec exec;
+ /* Number of VMs reserved */
+ unsigned int n_vms;
+ /* Pointer to sync object */
+ struct amdgpu_sync *sync;
+};
+
+enum bo_vm_match {
+ BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */
+ BO_VM_MAPPED, /* Match VMs where a BO is mapped */
+ BO_VM_ALL, /* Match all VMs a BO was added to */
+};
+
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+ struct amdgpu_vm *vm,
+ struct bo_vm_reservation_context *ctx)
+{
+ struct amdgpu_bo *bo = mem->bo;
+ int ret;
+
+ WARN_ON(!vm);
+
+ ctx->n_vms = 1;
+ ctx->sync = &mem->sync;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+ }
+ return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
+}
+
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+ struct amdgpu_vm *vm, enum bo_vm_match map_type,
+ struct bo_vm_reservation_context *ctx)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_bo *bo = mem->bo;
+ int ret;
+
+ ctx->sync = &mem->sync;
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_until_all_locked(&ctx->exec) {
+ ctx->n_vms = 0;
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if ((vm && vm != entry->bo_va->base.vm) ||
+ (entry->is_mapped != map_type
+ && map_type != BO_VM_ALL))
+ continue;
+
+ ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+ &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+ ++ctx->n_vms;
+ }
+
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(ret))
+ goto error;
+ }
+ return 0;
+
+error:
+ pr_err("Failed to reserve buffers in ttm.\n");
+ drm_exec_fini(&ctx->exec);
+ return ret;
+}
+
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+ bool wait, bool intr)
+{
+ int ret = 0;
+
+ if (wait)
+ ret = amdgpu_sync_wait(ctx->sync, intr);
+
+ drm_exec_fini(&ctx->exec);
+ ctx->sync = NULL;
+ return ret;
+}
+
+static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
+ struct amdgpu_device *adev = entry->adev;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+
+ amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+
+ amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
+}
+
+static int update_gpuvm_pte(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
+ struct amdgpu_device *adev = entry->adev;
+ int ret;
+
+ ret = kfd_mem_dmamap_attachment(mem, entry);
+ if (ret)
+ return ret;
+
+ /* Update the page tables */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret) {
+ pr_err("amdgpu_vm_bo_update failed\n");
+ return ret;
+ }
+
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+}
+
+static int map_bo_to_gpuvm(struct kgd_mem *mem,
+ struct kfd_mem_attachment *entry,
+ struct amdgpu_sync *sync,
+ bool no_update_pte)
+{
+ int ret;
+
+ /* Set virtual address for the allocation */
+ ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
+ amdgpu_bo_size(entry->bo_va->base.bo),
+ entry->pte_flags);
+ if (ret) {
+ pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+ entry->va, ret);
+ return ret;
+ }
+
+ if (no_update_pte)
+ return 0;
+
+ ret = update_gpuvm_pte(mem, entry, sync);
+ if (ret) {
+ pr_err("update_gpuvm_pte() failed\n");
+ goto update_gpuvm_pte_failed;
+ }
+
+ return 0;
+
+update_gpuvm_pte_failed:
+ unmap_bo_from_gpuvm(mem, entry, sync);
+ return ret;
+}
+
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = vm_validate_pt_pd_bos(peer_vm);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_bo *pd = peer_vm->root.bo;
+
+ ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
+ AMDGPU_SYNC_NE_OWNER,
+ AMDGPU_FENCE_OWNER_KFD);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = vm_update_pds(peer_vm, sync);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+ struct dma_fence **ef)
+{
+ struct amdkfd_process_info *info = NULL;
+ int ret;
+
+ if (!*process_info) {
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ mutex_init(&info->lock);
+ mutex_init(&info->notifier_lock);
+ INIT_LIST_HEAD(&info->vm_list_head);
+ INIT_LIST_HEAD(&info->kfd_bo_list);
+ INIT_LIST_HEAD(&info->userptr_valid_list);
+ INIT_LIST_HEAD(&info->userptr_inval_list);
+
+ info->eviction_fence =
+ amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+ current->mm,
+ NULL);
+ if (!info->eviction_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto create_evict_fence_fail;
+ }
+
+ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ INIT_DELAYED_WORK(&info->restore_userptr_work,
+ amdgpu_amdkfd_restore_userptr_worker);
+
+ *process_info = info;
+ *ef = dma_fence_get(&info->eviction_fence->base);
+ }
+
+ vm->process_info = *process_info;
+
+ /* Validate page directory and attach eviction fence */
+ ret = amdgpu_bo_reserve(vm->root.bo, true);
+ if (ret)
+ goto reserve_pd_fail;
+ ret = vm_validate_pt_pd_bos(vm);
+ if (ret) {
+ pr_err("validate_pt_pd_bos() failed\n");
+ goto validate_pd_fail;
+ }
+ ret = amdgpu_bo_sync_wait(vm->root.bo,
+ AMDGPU_FENCE_OWNER_KFD, false);
+ if (ret)
+ goto wait_pd_fail;
+ ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv,
+ &vm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ /* Update process info */
+ mutex_lock(&vm->process_info->lock);
+ list_add_tail(&vm->vm_list_node,
+ &(vm->process_info->vm_list_head));
+ vm->process_info->n_vms++;
+ mutex_unlock(&vm->process_info->lock);
+
+ return 0;
+
+reserve_shared_fail:
+wait_pd_fail:
+validate_pd_fail:
+ amdgpu_bo_unreserve(vm->root.bo);
+reserve_pd_fail:
+ vm->process_info = NULL;
+ if (info) {
+ /* Two fence references: one in info and one in *ef */
+ dma_fence_put(&info->eviction_fence->base);
+ dma_fence_put(*ef);
+ *ef = NULL;
+ *process_info = NULL;
+ put_pid(info->pid);
+create_evict_fence_fail:
+ mutex_destroy(&info->lock);
+ mutex_destroy(&info->notifier_lock);
+ kfree(info);
+ }
+ return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ * - USERPTR BOs are UNPINNABLE and will return error
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
+{
+ int ret = 0;
+
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return ret;
+
+ ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+ if (ret)
+ pr_err("Error in Pinning BO to domain: %d\n", domain);
+
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ * - Is a illegal request for USERPTR BOs and is ignored
+ * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+ int ret = 0;
+
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret))
+ return;
+
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+}
+
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm, u32 pasid)
+
+{
+ int ret;
+
+ /* Free the original amdgpu allocated pasid,
+ * will be replaced with kfd allocated pasid.
+ */
+ if (avm->pasid) {
+ amdgpu_pasid_free(avm->pasid);
+ amdgpu_vm_set_pasid(adev, avm, 0);
+ }
+
+ ret = amdgpu_vm_set_pasid(adev, avm, pasid);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *avm,
+ void **process_info,
+ struct dma_fence **ef)
+{
+ int ret;
+
+ /* Already a compute VM? */
+ if (avm->process_info)
+ return -EINVAL;
+
+ /* Convert VM into a compute VM */
+ ret = amdgpu_vm_make_compute(adev, avm);
+ if (ret)
+ return ret;
+
+ /* Initialize KFD part of the VM and process info */
+ ret = init_kfd_vm(avm, process_info, ef);
+ if (ret)
+ return ret;
+
+ amdgpu_vm_set_task_info(avm);
+
+ return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ struct amdkfd_process_info *process_info = vm->process_info;
+
+ if (!process_info)
+ return;
+
+ /* Update process info */
+ mutex_lock(&process_info->lock);
+ process_info->n_vms--;
+ list_del(&vm->vm_list_node);
+ mutex_unlock(&process_info->lock);
+
+ vm->process_info = NULL;
+
+ /* Release per-process resources when last compute VM is destroyed */
+ if (!process_info->n_vms) {
+ WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
+
+ dma_fence_put(&process_info->eviction_fence->base);
+ cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ put_pid(process_info->pid);
+ mutex_destroy(&process_info->lock);
+ mutex_destroy(&process_info->notifier_lock);
+ kfree(process_info);
+ }
+}
+
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
+ void *drm_priv)
+{
+ struct amdgpu_vm *avm;
+
+ if (WARN_ON(!adev || !drm_priv))
+ return;
+
+ avm = drm_priv_to_vm(drm_priv);
+
+ pr_debug("Releasing process vm %p\n", avm);
+
+ /* The original pasid of amdgpu vm has already been
+ * released during making a amdgpu vm to a compute vm
+ * The current pasid is managed by kfd and will be
+ * released on kfd process destroy. Set amdgpu pasid
+ * to 0 to avoid duplicate release.
+ */
+ amdgpu_vm_release_compute(adev, avm);
+}
+
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
+{
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_bo *pd = avm->root.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+
+ if (adev->asic_type < CHIP_VEGA10)
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ return avm->pd_phys_addr;
+}
+
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ WRITE_ONCE(pinfo->block_mmu_notifications, true);
+ mutex_unlock(&pinfo->lock);
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+ int ret = 0;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ pr_debug("scheduling work\n");
+ mutex_lock(&pinfo->notifier_lock);
+ pinfo->evicted_bos++;
+ mutex_unlock(&pinfo->notifier_lock);
+ if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ WRITE_ONCE(pinfo->block_mmu_notifications, false);
+ schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+
+out_unlock:
+ mutex_unlock(&pinfo->lock);
+ return ret;
+}
+
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+ uint8_t xcp_id)
+{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ ssize_t available;
+ uint64_t vram_available, system_mem_available, ttm_mem_available;
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt;
+
+ if (adev->gmc.is_app_apu) {
+ system_mem_available = no_system_mem_limit ?
+ kfd_mem_limit.max_system_mem_limit :
+ kfd_mem_limit.max_system_mem_limit -
+ kfd_mem_limit.system_mem_used;
+
+ ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+ kfd_mem_limit.ttm_mem_used;
+
+ available = min3(system_mem_available, ttm_mem_available,
+ vram_available);
+ available = ALIGN_DOWN(available, PAGE_SIZE);
+ } else {
+ available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+ }
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ if (available < 0)
+ available = 0;
+
+ return available;
+}
+
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct amdgpu_device *adev, uint64_t va, uint64_t size,
+ void *drm_priv, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags, bool criu_resume)
+{
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
+ enum ttm_bo_type bo_type = ttm_bo_type_device;
+ struct sg_table *sg = NULL;
+ uint64_t user_addr = 0;
+ struct amdgpu_bo *bo;
+ struct drm_gem_object *gobj = NULL;
+ u32 domain, alloc_domain;
+ uint64_t aligned_size;
+ int8_t xcp_id = -1;
+ u64 alloc_flags;
+ int ret;
+
+ /*
+ * Check on which domain to allocate BO
+ */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ if (adev->gmc.is_app_apu) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+ }
+ xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+ 0 : fpriv->xcp_id;
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+ alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = untagged_addr(*offset);
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ bo_type = ttm_bo_type_sg;
+ if (size > UINT_MAX)
+ return -EINVAL;
+ sg = create_sg_table(*offset, size);
+ if (!sg)
+ return -ENOMEM;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+ alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ INIT_LIST_HEAD(&(*mem)->attachments);
+ mutex_init(&(*mem)->lock);
+ (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+
+ /* Workaround for AQL queue wraparound bug. Map the same
+ * memory twice. That means we only actually allocate half
+ * the memory.
+ */
+ if ((*mem)->aql_queue)
+ size >>= 1;
+ aligned_size = PAGE_ALIGN(size);
+
+ (*mem)->alloc_flags = flags;
+
+ amdgpu_sync_create(&(*mem)->sync);
+
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+ xcp_id);
+ if (ret) {
+ pr_debug("Insufficient memory\n");
+ goto err_reserve_limit;
+ }
+
+ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+ va, (*mem)->aql_queue ? size << 1 : size,
+ domain_string(alloc_domain), xcp_id);
+
+ ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
+ bo_type, NULL, &gobj, xcp_id + 1);
+ if (ret) {
+ pr_debug("Failed to create BO on domain %s. ret %d\n",
+ domain_string(alloc_domain), ret);
+ goto err_bo_create;
+ }
+ ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
+ if (ret) {
+ pr_debug("Failed to allow vma node access. ret %d\n", ret);
+ goto err_node_allow;
+ }
+ bo = gem_to_amdgpu_bo(gobj);
+ if (bo_type == ttm_bo_type_sg) {
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ }
+ bo->kfd_bo = *mem;
+ (*mem)->bo = bo;
+ if (user_addr)
+ bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
+
+ (*mem)->va = va;
+ (*mem)->domain = domain;
+ (*mem)->mapped_to_gpu_memory = 0;
+ (*mem)->process_info = avm->process_info;
+
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+ if (user_addr) {
+ pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
+ ret = init_user_pages(*mem, user_addr, criu_resume);
+ if (ret)
+ goto allocate_init_user_pages_failed;
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
+ goto err_pin_bo;
+ }
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ }
+
+ if (offset)
+ *offset = amdgpu_bo_mmap_offset(bo);
+
+ return 0;
+
+allocate_init_user_pages_failed:
+err_pin_bo:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+err_node_allow:
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_limit;
+err_bo_create:
+ amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
+err_reserve_limit:
+ mutex_destroy(&(*mem)->lock);
+ if (gobj)
+ drm_gem_object_put(gobj);
+ else
+ kfree(*mem);
+err:
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ unsigned long bo_size = mem->bo->tbo.base.size;
+ bool use_release_notifier = (mem->bo->kfd_bo == mem);
+ struct kfd_mem_attachment *entry, *tmp;
+ struct bo_vm_reservation_context ctx;
+ unsigned int mapped_to_gpu_memory;
+ int ret;
+ bool is_imported = false;
+
+ mutex_lock(&mem->lock);
+
+ /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+ if (mem->alloc_flags &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+ }
+
+ mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
+ is_imported = mem->is_imported;
+ mutex_unlock(&mem->lock);
+ /* lock is not needed after this, since mem is unused and will
+ * be freed anyway
+ */
+
+ if (mapped_to_gpu_memory > 0) {
+ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+ mem->va, bo_size);
+ return -EBUSY;
+ }
+
+ /* Make sure restore workers don't access the BO any more */
+ mutex_lock(&process_info->lock);
+ list_del(&mem->validate_list);
+ mutex_unlock(&process_info->lock);
+
+ /* Cleanup user pages and MMU notifiers */
+ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+ amdgpu_hmm_unregister(mem->bo);
+ mutex_lock(&process_info->notifier_lock);
+ amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+ mutex_unlock(&process_info->notifier_lock);
+ }
+
+ ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+ if (unlikely(ret))
+ return ret;
+
+ /* The eviction fence should be removed by the last unmap.
+ * TODO: Log an error condition if the bo still has the eviction fence
+ * attached
+ */
+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+ process_info->eviction_fence);
+ pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue));
+
+ /* Remove from VM internal data structures */
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+ kfd_mem_detach(entry);
+
+ ret = unreserve_bo_and_vms(&ctx, false, false);
+
+ /* Free the sync object */
+ amdgpu_sync_free(&mem->sync);
+
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
+ * remap BO. We need to free it.
+ */
+ if (mem->bo->tbo.sg) {
+ sg_free_table(mem->bo->tbo.sg);
+ kfree(mem->bo->tbo.sg);
+ }
+
+ /* Update the size of the BO being freed if it was allocated from
+ * VRAM and is not imported. For APP APU VRAM allocations are done
+ * in GTT domain
+ */
+ if (size) {
+ if (!is_imported &&
+ (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
+ (adev->gmc.is_app_apu &&
+ mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
+ *size = bo_size;
+ else
+ *size = 0;
+ }
+
+ /* Free the BO*/
+ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+ if (mem->dmabuf)
+ dma_buf_put(mem->dmabuf);
+ mutex_destroy(&mem->lock);
+
+ /* If this releases the last reference, it will end up calling
+ * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+ * this needs to be the last call here.
+ */
+ drm_gem_object_put(&mem->bo->tbo.base);
+
+ /*
+ * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+ * explicitly free it here.
+ */
+ if (!use_release_notifier)
+ kfree(mem);
+
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ struct amdgpu_device *adev, struct kgd_mem *mem,
+ void *drm_priv)
+{
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ int ret;
+ struct amdgpu_bo *bo;
+ uint32_t domain;
+ struct kfd_mem_attachment *entry;
+ struct bo_vm_reservation_context ctx;
+ unsigned long bo_size;
+ bool is_invalid_userptr = false;
+
+ bo = mem->bo;
+ if (!bo) {
+ pr_err("Invalid BO when mapping memory to GPU\n");
+ return -EINVAL;
+ }
+
+ /* Make sure restore is not running concurrently. Since we
+ * don't map invalid userptr BOs, we rely on the next restore
+ * worker to do the mapping
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ /* Lock notifier lock. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ mutex_lock(&mem->process_info->notifier_lock);
+ is_invalid_userptr = !!mem->invalid;
+ mutex_unlock(&mem->process_info->notifier_lock);
+ }
+
+ mutex_lock(&mem->lock);
+
+ domain = mem->domain;
+ bo_size = bo->tbo.base.size;
+
+ pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+ mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue),
+ avm, domain_string(domain));
+
+ if (!kfd_mem_is_attached(avm, mem)) {
+ ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
+ if (ret)
+ goto out;
+ }
+
+ ret = reserve_bo_and_vm(mem, avm, &ctx);
+ if (unlikely(ret))
+ goto out;
+
+ /* Userptr can be marked as "not invalid", but not actually be
+ * validated yet (still in the system domain). In that case
+ * the queues are still stopped and we can leave mapping for
+ * the next restore worker
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
+ bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+ is_invalid_userptr = true;
+
+ ret = vm_validate_pt_pd_bos(avm);
+ if (unlikely(ret))
+ goto out_unreserve;
+
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ /* Validate BO only once. The eviction fence gets added to BO
+ * the first time it is mapped. Validate will wait for all
+ * background evictions to complete.
+ */
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret) {
+ pr_debug("Validate failed\n");
+ goto out_unreserve;
+ }
+ }
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != avm || entry->is_mapped)
+ continue;
+
+ pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+ entry->va, entry->va + bo_size, entry);
+
+ ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
+ is_invalid_userptr);
+ if (ret) {
+ pr_err("Failed to map bo to gpuvm\n");
+ goto out_unreserve;
+ }
+
+ ret = vm_update_pds(avm, ctx.sync);
+ if (ret) {
+ pr_err("Failed to update page directories\n");
+ goto out_unreserve;
+ }
+
+ entry->is_mapped = true;
+ mem->mapped_to_gpu_memory++;
+ pr_debug("\t INC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
+ }
+
+ if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &avm->process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ ret = unreserve_bo_and_vms(&ctx, false, false);
+
+ goto out;
+
+out_unreserve:
+ unreserve_bo_and_vms(&ctx, false, false);
+out:
+ mutex_unlock(&mem->process_info->lock);
+ mutex_unlock(&mem->lock);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
+{
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdkfd_process_info *process_info = avm->process_info;
+ unsigned long bo_size = mem->bo->tbo.base.size;
+ struct kfd_mem_attachment *entry;
+ struct bo_vm_reservation_context ctx;
+ int ret;
+
+ mutex_lock(&mem->lock);
+
+ ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
+ if (unlikely(ret))
+ goto out;
+ /* If no VMs were reserved, it means the BO wasn't actually mapped */
+ if (ctx.n_vms == 0) {
+ ret = -EINVAL;
+ goto unreserve_out;
+ }
+
+ ret = vm_validate_pt_pd_bos(avm);
+ if (unlikely(ret))
+ goto unreserve_out;
+
+ pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+ mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue),
+ avm);
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != avm || !entry->is_mapped)
+ continue;
+
+ pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+ entry->va, entry->va + bo_size, entry);
+
+ unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ entry->is_mapped = false;
+
+ mem->mapped_to_gpu_memory--;
+ pr_debug("\t DEC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
+ }
+
+ /* If BO is unmapped from all VMs, unfence it. It can be evicted if
+ * required.
+ */
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
+ !mem->bo->tbo.pin_count)
+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+ process_info->eviction_fence);
+
+unreserve_out:
+ unreserve_bo_and_vms(&ctx, false, false);
+out:
+ mutex_unlock(&mem->lock);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_sync_memory(
+ struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
+{
+ struct amdgpu_sync sync;
+ int ret;
+
+ amdgpu_sync_create(&sync);
+
+ mutex_lock(&mem->lock);
+ amdgpu_sync_clone(&mem->sync, &sync);
+ mutex_unlock(&mem->lock);
+
+ ret = amdgpu_sync_wait(&sync, intr);
+ amdgpu_sync_free(&sync);
+ return ret;
+}
+
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @adev: Device to which allocated BO belongs
+ * @bo: Buffer object to be mapped
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto err_pin_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ pr_err("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, bo->vm_bo->vm->process_info->eviction_fence);
+
+ amdgpu_bo_unreserve(bo);
+
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be mapped for CPU access
+ * @kptr[out]: pointer in kernel CPU address space
+ * @size[out]: size of the buffer
+ *
+ * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
+ * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
+ * validate_list, so the GPU mapping can be restored after a page table was
+ * evicted.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size)
+{
+ int ret;
+ struct amdgpu_bo *bo = mem->bo;
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ pr_err("userptr can't be mapped to kernel\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&mem->process_info->lock);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto bo_reserve_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto pin_failed;
+ }
+
+ ret = amdgpu_bo_kmap(bo, kptr);
+ if (ret) {
+ pr_err("Failed to map bo to kernel. ret %d\n", ret);
+ goto kmap_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, mem->process_info->eviction_fence);
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+
+ amdgpu_bo_unreserve(bo);
+
+ mutex_unlock(&mem->process_info->lock);
+ return 0;
+
+kmap_failed:
+ amdgpu_bo_unpin(bo);
+pin_failed:
+ amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+ mutex_unlock(&mem->process_info->lock);
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be unmapped for CPU access
+ *
+ * Removes the kernel CPU mapping and unpins the BO. It does not restore the
+ * eviction fence, so this function should only be used for cleanup before the
+ * BO is destroyed.
+ */
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
+{
+ struct amdgpu_bo *bo = mem->bo;
+
+ amdgpu_bo_reserve(bo, true);
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+}
+
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+ struct kfd_vm_fault_info *mem)
+{
+ if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ *mem = *adev->gmc.vm_fault_info;
+ mb(); /* make sure read happened */
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ }
+ return 0;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ bo = gem_to_amdgpu_bo(obj);
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT))) {
+ /* Only VRAM and GTT BOs are supported */
+ ret = -EINVAL;
+ goto err_put_obj;
+ }
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem) {
+ ret = -ENOMEM;
+ goto err_put_obj;
+ }
+
+ ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+ if (ret)
+ goto err_free_mem;
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+
+ if (mmap_offset)
+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+ INIT_LIST_HEAD(&(*mem)->attachments);
+ mutex_init(&(*mem)->lock);
+
+ (*mem)->alloc_flags =
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
+ | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+
+ get_dma_buf(dma_buf);
+ (*mem)->dmabuf = dma_buf;
+ (*mem)->bo = bo;
+ (*mem)->va = va;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
+ (*mem)->mapped_to_gpu_memory = 0;
+ (*mem)->process_info = avm->process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+ (*mem)->is_imported = true;
+
+ return 0;
+
+err_free_mem:
+ kfree(*mem);
+err_put_obj:
+ drm_gem_object_put(obj);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+ struct dma_buf **dma_buf)
+{
+ int ret;
+
+ mutex_lock(&mem->lock);
+ ret = kfd_mem_export_dmabuf(mem);
+ if (ret)
+ goto out;
+
+ get_dma_buf(mem->dmabuf);
+ *dma_buf = mem->dmabuf;
+out:
+ mutex_unlock(&mem->lock);
+ return ret;
+}
+
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+ unsigned long cur_seq, struct kgd_mem *mem)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ int r = 0;
+
+ /* Do not process MMU notifications during CRIU restore until
+ * KFD_CRIU_OP_RESUME IOCTL is received
+ */
+ if (READ_ONCE(process_info->block_mmu_notifications))
+ return 0;
+
+ mutex_lock(&process_info->notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ mem->invalid++;
+ if (++process_info->evicted_bos == 1) {
+ /* First eviction, stop the queues */
+ r = kgd2kfd_quiesce_mm(mni->mm,
+ KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+ if (r)
+ pr_err("Failed to quiesce KFD\n");
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ }
+ mutex_unlock(&process_info->notifier_lock);
+
+ return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ struct mm_struct *mm)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ uint32_t invalid;
+ int ret = 0;
+
+ mutex_lock(&process_info->notifier_lock);
+
+ /* Move all invalidated BOs to the userptr_inval_list */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_valid_list,
+ validate_list)
+ if (mem->invalid)
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_inval_list);
+
+ /* Go through userptr_inval_list and update any invalid user_pages */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list) {
+ invalid = mem->invalid;
+ if (!invalid)
+ /* BO hasn't been invalidated since the last
+ * revalidation attempt. Keep its page list.
+ */
+ continue;
+
+ bo = mem->bo;
+
+ amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+ mem->range = NULL;
+
+ /* BO reservations and getting user pages (hmm_range_fault)
+ * must happen outside the notifier lock
+ */
+ mutex_unlock(&process_info->notifier_lock);
+
+ /* Move the BO to system (CPU) domain if necessary to unmap
+ * and free the SG table
+ */
+ if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+ }
+
+ /* Get updated user pages */
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
+ &mem->range);
+ if (ret) {
+ pr_debug("Failed %d to get user pages\n", ret);
+
+ /* Return -EFAULT bad address error as success. It will
+ * fail later with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with stalled
+ * user mode queues.
+ *
+ * Return other error -EBUSY or -ENOMEM to retry restore
+ */
+ if (ret != -EFAULT)
+ return ret;
+
+ ret = 0;
+ }
+
+ mutex_lock(&process_info->notifier_lock);
+
+ /* Mark the BO as valid unless it was invalidated
+ * again concurrently.
+ */
+ if (mem->invalid != invalid) {
+ ret = -EAGAIN;
+ goto unlock_out;
+ }
+ /* set mem valid if mem has hmm range associated */
+ if (mem->range)
+ mem->invalid = 0;
+ }
+
+unlock_out:
+ mutex_unlock(&process_info->notifier_lock);
+
+ return ret;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_sync sync;
+ struct drm_exec exec;
+
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ amdgpu_sync_create(&sync);
+
+ drm_exec_init(&exec, 0);
+ /* Reserve all BOs and page tables for validation */
+ drm_exec_until_all_locked(&exec) {
+ /* Reserve all the page directories */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
+
+ /* Reserve the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
+
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
+ }
+
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto unreserve_out;
+
+ /* Validate BOs and update GPUVM page tables */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
+ bo = mem->bo;
+
+ /* Validate the BO if we got user pages */
+ if (bo->tbo.ttm->pages[0]) {
+ amdgpu_bo_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret) {
+ pr_err("%s: failed to validate BO\n", __func__);
+ goto unreserve_out;
+ }
+ }
+
+ /* Update mapping. If the BO was not validated
+ * (because we couldn't get user pages), this will
+ * clear the page table entries, which will result in
+ * VM faults if the GPU tries to access the invalid
+ * memory.
+ */
+ list_for_each_entry(attachment, &mem->attachments, list) {
+ if (!attachment->is_mapped)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, attachment);
+ ret = update_gpuvm_pte(mem, attachment, &sync);
+ if (ret) {
+ pr_err("%s: update PTE failed\n", __func__);
+ /* make sure this gets validated again */
+ mutex_lock(&process_info->notifier_lock);
+ mem->invalid++;
+ mutex_unlock(&process_info->notifier_lock);
+ goto unreserve_out;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+ drm_exec_fini(&exec);
+ amdgpu_sync_wait(&sync, false);
+ amdgpu_sync_free(&sync);
+
+ return ret;
+}
+
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ int ret = 0;
+
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list) {
+ bool valid;
+
+ /* keep mem without hmm range at userptr_inval_list */
+ if (!mem->range)
+ continue;
+
+ /* Only check mem with hmm range associated */
+ valid = amdgpu_ttm_tt_get_user_pages_done(
+ mem->bo->tbo.ttm, mem->range);
+
+ mem->range = NULL;
+ if (!valid) {
+ WARN(!mem->invalid, "Invalid BO not marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ if (mem->invalid) {
+ WARN(1, "Valid BO is marked invalid");
+ ret = -EAGAIN;
+ continue;
+ }
+
+ list_move_tail(&mem->validate_list,
+ &process_info->userptr_valid_list);
+ }
+
+ return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info =
+ container_of(dwork, struct amdkfd_process_info,
+ restore_userptr_work);
+ struct task_struct *usertask;
+ struct mm_struct *mm;
+ uint32_t evicted_bos;
+
+ mutex_lock(&process_info->notifier_lock);
+ evicted_bos = process_info->evicted_bos;
+ mutex_unlock(&process_info->notifier_lock);
+ if (!evicted_bos)
+ return;
+
+ /* Reference task and mm in case of concurrent process termination */
+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ if (!usertask)
+ return;
+ mm = get_task_mm(usertask);
+ if (!mm) {
+ put_task_struct(usertask);
+ return;
+ }
+
+ mutex_lock(&process_info->lock);
+
+ if (update_invalid_user_pages(process_info, mm))
+ goto unlock_out;
+ /* userptr_inval_list can be empty if all evicted userptr BOs
+ * have been freed. In that case there is nothing to validate
+ * and we can just restart the queues.
+ */
+ if (!list_empty(&process_info->userptr_inval_list)) {
+ if (validate_invalid_user_pages(process_info))
+ goto unlock_out;
+ }
+ /* Final check for concurrent evicton and atomic update. If
+ * another eviction happens after successful update, it will
+ * be a first eviction that calls quiesce_mm. The eviction
+ * reference counting inside KFD will handle this case.
+ */
+ mutex_lock(&process_info->notifier_lock);
+ if (process_info->evicted_bos != evicted_bos)
+ goto unlock_notifier_out;
+
+ if (confirm_valid_user_pages_locked(process_info)) {
+ WARN(1, "User pages unexpectedly invalid");
+ goto unlock_notifier_out;
+ }
+
+ process_info->evicted_bos = evicted_bos = 0;
+
+ if (kgd2kfd_resume_mm(mm)) {
+ pr_err("%s: Failed to resume KFD\n", __func__);
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ }
+
+unlock_notifier_out:
+ mutex_unlock(&process_info->notifier_lock);
+unlock_out:
+ mutex_unlock(&process_info->lock);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_bos) {
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
+ }
+ mmput(mm);
+ put_task_struct(usertask);
+}
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ * KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1. Release old eviction fence and create new one
+ * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ * BOs that need to be reserved.
+ * 4. Reserve all the BOs
+ * 5. Validate of PD and PT BOs.
+ * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7. Add fence to all PD and PT BOs.
+ * 8. Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+{
+ struct amdkfd_process_info *process_info = info;
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem;
+ struct amdgpu_amdkfd_fence *new_fence;
+ struct list_head duplicate_save;
+ struct amdgpu_sync sync_obj;
+ unsigned long failed_size = 0;
+ unsigned long total_size = 0;
+ struct drm_exec exec;
+ int ret;
+
+ INIT_LIST_HEAD(&duplicate_save);
+
+ mutex_lock(&process_info->lock);
+
+ drm_exec_init(&exec, 0);
+ drm_exec_until_all_locked(&exec) {
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto ttm_reserve_fail;
+ }
+
+ /* Reserve all BOs and page tables/directory. Add all BOs from
+ * kfd_bo_list to ctx.list
+ */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
+
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto ttm_reserve_fail;
+ }
+ }
+
+ amdgpu_sync_create(&sync_obj);
+
+ /* Validate PDs and PTs */
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto validate_map_fail;
+
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
+ }
+
+ /* Validate BOs and map them to GPUVM (update VM page tables). */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+
+ struct amdgpu_bo *bo = mem->bo;
+ uint32_t domain = mem->domain;
+ struct kfd_mem_attachment *attachment;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+
+ total_size += amdgpu_bo_size(bo);
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+ if (ret) {
+ pr_debug("Memory eviction: Validate BOs failed\n");
+ failed_size += amdgpu_bo_size(bo);
+ ret = amdgpu_amdkfd_bo_validate(bo,
+ AMDGPU_GEM_DOMAIN_GTT, false);
+ if (ret) {
+ pr_debug("Memory eviction: Try again\n");
+ goto validate_map_fail;
+ }
+ }
+ dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, fence) {
+ ret = amdgpu_sync_fence(&sync_obj, fence);
+ if (ret) {
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+ goto validate_map_fail;
+ }
+ }
+ list_for_each_entry(attachment, &mem->attachments, list) {
+ if (!attachment->is_mapped)
+ continue;
+
+ if (attachment->bo_va->base.bo->tbo.pin_count)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, attachment);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: update PTE failed. Try again\n");
+ goto validate_map_fail;
+ }
+ }
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: update PDs failed. Try again\n");
+ goto validate_map_fail;
+ }
+
+ /* Wait for validate and PT updates to finish */
+ amdgpu_sync_wait(&sync_obj, false);
+
+ /* Release old eviction fence and create new one, because fence only
+ * goes from unsignaled to signaled, fence cannot be reused.
+ * Use context and mm from the old fence.
+ */
+ new_fence = amdgpu_amdkfd_fence_create(
+ process_info->eviction_fence->base.context,
+ process_info->eviction_fence->mm,
+ NULL);
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ *ef = dma_fence_get(&new_fence->base);
+
+ /* Attach new eviction fence to all BOs except pinned ones */
+ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
+ if (mem->bo->tbo.pin_count)
+ continue;
+
+ dma_resv_add_fence(mem->bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ /* Attach eviction fence to PD / PT BOs */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_bo *bo = peer_vm->root.bo;
+
+ dma_resv_add_fence(bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+
+validate_map_fail:
+ amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+ drm_exec_fini(&exec);
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+ int ret;
+
+ if (!info || !gws)
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+
+ mutex_init(&(*mem)->lock);
+ INIT_LIST_HEAD(&(*mem)->attachments);
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+ (*mem)->process_info = process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+
+ /* Validate gws bo the first time it is added to process */
+ mutex_lock(&(*mem)->process_info->lock);
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ goto bo_reservation_failure;
+ }
+
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+ if (ret) {
+ pr_err("GWS BO validate failed %d\n", ret);
+ goto bo_validation_failure;
+ }
+ /* GWS resource is shared b/t amdgpu and amdkfd
+ * Add process eviction fence to bo so they can
+ * evict each other.
+ */
+ ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
+ dma_resv_add_fence(gws_bo->tbo.base.resv,
+ &process_info->eviction_fence->base,
+ DMA_RESV_USAGE_BOOKKEEP);
+ amdgpu_bo_unreserve(gws_bo);
+ mutex_unlock(&(*mem)->process_info->lock);
+
+ return ret;
+
+reserve_shared_fail:
+bo_validation_failure:
+ amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+ mutex_unlock(&(*mem)->process_info->lock);
+ amdgpu_sync_free(&(*mem)->sync);
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+ *mem = NULL;
+ return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+ int ret;
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+ /* Remove BO from process's validate list so restore worker won't touch
+ * it anymore
+ */
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ //TODO add BO back to validate_list?
+ return ret;
+ }
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+ process_info->eviction_fence);
+ amdgpu_bo_unreserve(gws_bo);
+ amdgpu_sync_free(&kgd_mem->sync);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&kgd_mem->lock);
+ kfree(mem);
+ return 0;
+}
+
+/* Returns GPU-specific tiling mode information */
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
+ struct tile_config *config)
+{
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ /* Those values are not set from GFX9 onwards */
+ config->num_banks = adev->gfx.config.num_banks;
+ config->num_ranks = adev->gfx.config.num_ranks;
+
+ return 0;
+}
+
+bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ struct kfd_mem_attachment *entry;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->is_mapped && entry->adev == adev)
+ return true;
+ }
+ return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
+{
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ seq_printf(m, "System mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.system_mem_used >> 20),
+ (kfd_mem_limit.max_system_mem_limit >> 20));
+ seq_printf(m, "TTM mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.ttm_mem_used >> 20),
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
new file mode 100644
index 000000000..dce9e7d5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -0,0 +1,1888 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
+
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "bif/bif_4_1_d.h"
+
+static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
+ ATOM_GPIO_I2C_ASSIGMENT *gpio,
+ u8 index)
+{
+
+}
+
+static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
+{
+ struct amdgpu_i2c_bus_rec i2c;
+
+ memset(&i2c, 0, sizeof(struct amdgpu_i2c_bus_rec));
+
+ i2c.mask_clk_reg = le16_to_cpu(gpio->usClkMaskRegisterIndex);
+ i2c.mask_data_reg = le16_to_cpu(gpio->usDataMaskRegisterIndex);
+ i2c.en_clk_reg = le16_to_cpu(gpio->usClkEnRegisterIndex);
+ i2c.en_data_reg = le16_to_cpu(gpio->usDataEnRegisterIndex);
+ i2c.y_clk_reg = le16_to_cpu(gpio->usClkY_RegisterIndex);
+ i2c.y_data_reg = le16_to_cpu(gpio->usDataY_RegisterIndex);
+ i2c.a_clk_reg = le16_to_cpu(gpio->usClkA_RegisterIndex);
+ i2c.a_data_reg = le16_to_cpu(gpio->usDataA_RegisterIndex);
+ i2c.mask_clk_mask = (1 << gpio->ucClkMaskShift);
+ i2c.mask_data_mask = (1 << gpio->ucDataMaskShift);
+ i2c.en_clk_mask = (1 << gpio->ucClkEnShift);
+ i2c.en_data_mask = (1 << gpio->ucDataEnShift);
+ i2c.y_clk_mask = (1 << gpio->ucClkY_Shift);
+ i2c.y_data_mask = (1 << gpio->ucDataY_Shift);
+ i2c.a_clk_mask = (1 << gpio->ucClkA_Shift);
+ i2c.a_data_mask = (1 << gpio->ucDataA_Shift);
+
+ if (gpio->sucI2cId.sbfAccess.bfHW_Capable)
+ i2c.hw_capable = true;
+ else
+ i2c.hw_capable = false;
+
+ if (gpio->sucI2cId.ucAccess == 0xa0)
+ i2c.mm_i2c = true;
+ else
+ i2c.mm_i2c = false;
+
+ i2c.i2c_id = gpio->sucI2cId.ucAccess;
+
+ if (i2c.mask_clk_reg)
+ i2c.valid = true;
+ else
+ i2c.valid = false;
+
+ return i2c;
+}
+
+struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+ uint8_t id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+
+ memset(&i2c, 0, sizeof(struct amdgpu_i2c_bus_rec));
+ i2c.valid = false;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+
+ amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+
+ if (gpio->sucI2cId.ucAccess == id) {
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+ break;
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+
+ return i2c;
+}
+
+void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+ char stmp[32];
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+ amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
+
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+ if (i2c.valid) {
+ sprintf(stmp, "0x%x", i2c.i2c_id);
+ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+}
+
+struct amdgpu_gpio_rec
+amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ u8 id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ struct amdgpu_gpio_rec gpio;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT);
+ struct _ATOM_GPIO_PIN_LUT *gpio_info;
+ ATOM_GPIO_PIN_ASSIGNMENT *pin;
+ u16 data_offset, size;
+ int i, num_indices;
+
+ memset(&gpio, 0, sizeof(struct amdgpu_gpio_rec));
+ gpio.valid = false;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ gpio_info = (struct _ATOM_GPIO_PIN_LUT *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_PIN_ASSIGNMENT);
+
+ pin = gpio_info->asGPIO_Pin;
+ for (i = 0; i < num_indices; i++) {
+ if (id == pin->ucGPIO_ID) {
+ gpio.id = pin->ucGPIO_ID;
+ gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex);
+ gpio.shift = pin->ucGpioPinBitShift;
+ gpio.mask = (1 << pin->ucGpioPinBitShift);
+ gpio.valid = true;
+ break;
+ }
+ pin = (ATOM_GPIO_PIN_ASSIGNMENT *)
+ ((u8 *)pin + sizeof(ATOM_GPIO_PIN_ASSIGNMENT));
+ }
+ }
+
+ return gpio;
+}
+
+static struct amdgpu_hpd
+amdgpu_atombios_get_hpd_info_from_gpio(struct amdgpu_device *adev,
+ struct amdgpu_gpio_rec *gpio)
+{
+ struct amdgpu_hpd hpd;
+ u32 reg;
+
+ memset(&hpd, 0, sizeof(struct amdgpu_hpd));
+
+ reg = amdgpu_display_hpd_get_gpio_reg(adev);
+
+ hpd.gpio = *gpio;
+ if (gpio->reg == reg) {
+ switch(gpio->mask) {
+ case (1 << 0):
+ hpd.hpd = AMDGPU_HPD_1;
+ break;
+ case (1 << 8):
+ hpd.hpd = AMDGPU_HPD_2;
+ break;
+ case (1 << 16):
+ hpd.hpd = AMDGPU_HPD_3;
+ break;
+ case (1 << 24):
+ hpd.hpd = AMDGPU_HPD_4;
+ break;
+ case (1 << 26):
+ hpd.hpd = AMDGPU_HPD_5;
+ break;
+ case (1 << 28):
+ hpd.hpd = AMDGPU_HPD_6;
+ break;
+ default:
+ hpd.hpd = AMDGPU_HPD_NONE;
+ break;
+ }
+ } else
+ hpd.hpd = AMDGPU_HPD_NONE;
+ return hpd;
+}
+
+static const int object_connector_convert[] = {
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_DVII,
+ DRM_MODE_CONNECTOR_DVII,
+ DRM_MODE_CONNECTOR_DVID,
+ DRM_MODE_CONNECTOR_DVID,
+ DRM_MODE_CONNECTOR_VGA,
+ DRM_MODE_CONNECTOR_Composite,
+ DRM_MODE_CONNECTOR_SVIDEO,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_9PinDIN,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_HDMIA,
+ DRM_MODE_CONNECTOR_HDMIB,
+ DRM_MODE_CONNECTOR_LVDS,
+ DRM_MODE_CONNECTOR_9PinDIN,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_Unknown,
+ DRM_MODE_CONNECTOR_DisplayPort,
+ DRM_MODE_CONNECTOR_eDP,
+ DRM_MODE_CONNECTOR_Unknown
+};
+
+bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx = mode_info->atom_context;
+ int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ u16 size, data_offset;
+ u8 frev, crev;
+ ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+ ATOM_OBJECT_HEADER *obj_header;
+
+ if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+ return false;
+
+ if (crev < 2)
+ return false;
+
+ obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+ path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usDisplayPathTableOffset));
+
+ if (path_obj->ucNumOfDispPath)
+ return true;
+ else
+ return false;
+}
+
+bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx = mode_info->atom_context;
+ int index = GetIndexIntoMasterTable(DATA, Object_Header);
+ u16 size, data_offset;
+ u8 frev, crev;
+ ATOM_CONNECTOR_OBJECT_TABLE *con_obj;
+ ATOM_ENCODER_OBJECT_TABLE *enc_obj;
+ ATOM_OBJECT_TABLE *router_obj;
+ ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+ ATOM_OBJECT_HEADER *obj_header;
+ int i, j, k, path_size, device_support;
+ int connector_type;
+ u16 conn_id, connector_object_id;
+ struct amdgpu_i2c_bus_rec ddc_bus;
+ struct amdgpu_router router;
+ struct amdgpu_gpio_rec gpio;
+ struct amdgpu_hpd hpd;
+
+ if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+ return false;
+
+ if (crev < 2)
+ return false;
+
+ obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+ path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usDisplayPathTableOffset));
+ con_obj = (ATOM_CONNECTOR_OBJECT_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usConnectorObjectTableOffset));
+ enc_obj = (ATOM_ENCODER_OBJECT_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usEncoderObjectTableOffset));
+ router_obj = (ATOM_OBJECT_TABLE *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(obj_header->usRouterObjectTableOffset));
+ device_support = le16_to_cpu(obj_header->usDeviceSupport);
+
+ path_size = 0;
+ for (i = 0; i < path_obj->ucNumOfDispPath; i++) {
+ uint8_t *addr = (uint8_t *) path_obj->asDispPath;
+ ATOM_DISPLAY_OBJECT_PATH *path;
+ addr += path_size;
+ path = (ATOM_DISPLAY_OBJECT_PATH *) addr;
+ path_size += le16_to_cpu(path->usSize);
+
+ if (device_support & le16_to_cpu(path->usDeviceTag)) {
+ uint8_t con_obj_id =
+ (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
+ >> OBJECT_ID_SHIFT;
+
+ /* Skip TV/CV support */
+ if ((le16_to_cpu(path->usDeviceTag) ==
+ ATOM_DEVICE_TV1_SUPPORT) ||
+ (le16_to_cpu(path->usDeviceTag) ==
+ ATOM_DEVICE_CV_SUPPORT))
+ continue;
+
+ if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+ DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+ con_obj_id, le16_to_cpu(path->usDeviceTag));
+ continue;
+ }
+
+ connector_type =
+ object_connector_convert[con_obj_id];
+ connector_object_id = con_obj_id;
+
+ if (connector_type == DRM_MODE_CONNECTOR_Unknown)
+ continue;
+
+ router.ddc_valid = false;
+ router.cd_valid = false;
+ for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
+ uint8_t grph_obj_type =
+ (le16_to_cpu(path->usGraphicObjIds[j]) &
+ OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+
+ if (grph_obj_type == GRAPH_OBJECT_TYPE_ENCODER) {
+ for (k = 0; k < enc_obj->ucNumberOfObjects; k++) {
+ u16 encoder_obj = le16_to_cpu(enc_obj->asObjects[k].usObjectID);
+ if (le16_to_cpu(path->usGraphicObjIds[j]) == encoder_obj) {
+ ATOM_COMMON_RECORD_HEADER *record = (ATOM_COMMON_RECORD_HEADER *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(enc_obj->asObjects[k].usRecordOffset));
+ ATOM_ENCODER_CAP_RECORD *cap_record;
+ u16 caps = 0;
+
+ while (record->ucRecordSize > 0 &&
+ record->ucRecordType > 0 &&
+ record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+ switch (record->ucRecordType) {
+ case ATOM_ENCODER_CAP_RECORD_TYPE:
+ cap_record =(ATOM_ENCODER_CAP_RECORD *)
+ record;
+ caps = le16_to_cpu(cap_record->usEncoderCap);
+ break;
+ }
+ record = (ATOM_COMMON_RECORD_HEADER *)
+ ((char *)record + record->ucRecordSize);
+ }
+ amdgpu_display_add_encoder(adev, encoder_obj,
+ le16_to_cpu(path->usDeviceTag),
+ caps);
+ }
+ }
+ } else if (grph_obj_type == GRAPH_OBJECT_TYPE_ROUTER) {
+ for (k = 0; k < router_obj->ucNumberOfObjects; k++) {
+ u16 router_obj_id = le16_to_cpu(router_obj->asObjects[k].usObjectID);
+ if (le16_to_cpu(path->usGraphicObjIds[j]) == router_obj_id) {
+ ATOM_COMMON_RECORD_HEADER *record = (ATOM_COMMON_RECORD_HEADER *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(router_obj->asObjects[k].usRecordOffset));
+ ATOM_I2C_RECORD *i2c_record;
+ ATOM_I2C_ID_CONFIG_ACCESS *i2c_config;
+ ATOM_ROUTER_DDC_PATH_SELECT_RECORD *ddc_path;
+ ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD *cd_path;
+ ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *router_src_dst_table =
+ (ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(router_obj->asObjects[k].usSrcDstTableOffset));
+ u8 *num_dst_objs = (u8 *)
+ ((u8 *)router_src_dst_table + 1 +
+ (router_src_dst_table->ucNumberOfSrc * 2));
+ u16 *dst_objs = (u16 *)(num_dst_objs + 1);
+ int enum_id;
+
+ router.router_id = router_obj_id;
+ for (enum_id = 0; enum_id < (*num_dst_objs); enum_id++) {
+ if (le16_to_cpu(path->usConnObjectId) ==
+ le16_to_cpu(dst_objs[enum_id]))
+ break;
+ }
+
+ while (record->ucRecordSize > 0 &&
+ record->ucRecordType > 0 &&
+ record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+ switch (record->ucRecordType) {
+ case ATOM_I2C_RECORD_TYPE:
+ i2c_record =
+ (ATOM_I2C_RECORD *)
+ record;
+ i2c_config =
+ (ATOM_I2C_ID_CONFIG_ACCESS *)
+ &i2c_record->sucI2cId;
+ router.i2c_info =
+ amdgpu_atombios_lookup_i2c_gpio(adev,
+ i2c_config->
+ ucAccess);
+ router.i2c_addr = i2c_record->ucI2CAddr >> 1;
+ break;
+ case ATOM_ROUTER_DDC_PATH_SELECT_RECORD_TYPE:
+ ddc_path = (ATOM_ROUTER_DDC_PATH_SELECT_RECORD *)
+ record;
+ router.ddc_valid = true;
+ router.ddc_mux_type = ddc_path->ucMuxType;
+ router.ddc_mux_control_pin = ddc_path->ucMuxControlPin;
+ router.ddc_mux_state = ddc_path->ucMuxState[enum_id];
+ break;
+ case ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD_TYPE:
+ cd_path = (ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD *)
+ record;
+ router.cd_valid = true;
+ router.cd_mux_type = cd_path->ucMuxType;
+ router.cd_mux_control_pin = cd_path->ucMuxControlPin;
+ router.cd_mux_state = cd_path->ucMuxState[enum_id];
+ break;
+ }
+ record = (ATOM_COMMON_RECORD_HEADER *)
+ ((char *)record + record->ucRecordSize);
+ }
+ }
+ }
+ }
+ }
+
+ /* look up gpio for ddc, hpd */
+ ddc_bus.valid = false;
+ hpd.hpd = AMDGPU_HPD_NONE;
+ if ((le16_to_cpu(path->usDeviceTag) &
+ (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) == 0) {
+ for (j = 0; j < con_obj->ucNumberOfObjects; j++) {
+ if (le16_to_cpu(path->usConnObjectId) ==
+ le16_to_cpu(con_obj->asObjects[j].
+ usObjectID)) {
+ ATOM_COMMON_RECORD_HEADER
+ *record =
+ (ATOM_COMMON_RECORD_HEADER
+ *)
+ (ctx->bios + data_offset +
+ le16_to_cpu(con_obj->
+ asObjects[j].
+ usRecordOffset));
+ ATOM_I2C_RECORD *i2c_record;
+ ATOM_HPD_INT_RECORD *hpd_record;
+ ATOM_I2C_ID_CONFIG_ACCESS *i2c_config;
+
+ while (record->ucRecordSize > 0 &&
+ record->ucRecordType > 0 &&
+ record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+ switch (record->ucRecordType) {
+ case ATOM_I2C_RECORD_TYPE:
+ i2c_record =
+ (ATOM_I2C_RECORD *)
+ record;
+ i2c_config =
+ (ATOM_I2C_ID_CONFIG_ACCESS *)
+ &i2c_record->sucI2cId;
+ ddc_bus = amdgpu_atombios_lookup_i2c_gpio(adev,
+ i2c_config->
+ ucAccess);
+ break;
+ case ATOM_HPD_INT_RECORD_TYPE:
+ hpd_record =
+ (ATOM_HPD_INT_RECORD *)
+ record;
+ gpio = amdgpu_atombios_lookup_gpio(adev,
+ hpd_record->ucHPDIntGPIOID);
+ hpd = amdgpu_atombios_get_hpd_info_from_gpio(adev, &gpio);
+ hpd.plugged_state = hpd_record->ucPlugged_PinState;
+ break;
+ }
+ record =
+ (ATOM_COMMON_RECORD_HEADER
+ *) ((char *)record
+ +
+ record->
+ ucRecordSize);
+ }
+ break;
+ }
+ }
+ }
+
+ /* needed for aux chan transactions */
+ ddc_bus.hpd = hpd.hpd;
+
+ conn_id = le16_to_cpu(path->usConnObjectId);
+
+ amdgpu_display_add_connector(adev,
+ conn_id,
+ le16_to_cpu(path->usDeviceTag),
+ connector_type, &ddc_bus,
+ connector_object_id,
+ &hpd,
+ &router);
+
+ }
+ }
+
+ amdgpu_link_encoder_connector(adev_to_drm(adev));
+
+ return true;
+}
+
+union firmware_info {
+ ATOM_FIRMWARE_INFO info;
+ ATOM_FIRMWARE_INFO_V1_2 info_12;
+ ATOM_FIRMWARE_INFO_V1_3 info_13;
+ ATOM_FIRMWARE_INFO_V1_4 info_14;
+ ATOM_FIRMWARE_INFO_V2_1 info_21;
+ ATOM_FIRMWARE_INFO_V2_2 info_22;
+};
+
+int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ int ret = -EINVAL;
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ int i;
+ struct amdgpu_pll *ppll = &adev->clock.ppll[0];
+ struct amdgpu_pll *spll = &adev->clock.spll;
+ struct amdgpu_pll *mpll = &adev->clock.mpll;
+ union firmware_info *firmware_info =
+ (union firmware_info *)(mode_info->atom_context->bios +
+ data_offset);
+ /* pixel clocks */
+ ppll->reference_freq =
+ le16_to_cpu(firmware_info->info.usReferenceClock);
+ ppll->reference_div = 0;
+
+ ppll->pll_out_min =
+ le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+ ppll->pll_out_max =
+ le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
+
+ ppll->lcd_pll_out_min =
+ le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+ if (ppll->lcd_pll_out_min == 0)
+ ppll->lcd_pll_out_min = ppll->pll_out_min;
+ ppll->lcd_pll_out_max =
+ le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+ if (ppll->lcd_pll_out_max == 0)
+ ppll->lcd_pll_out_max = ppll->pll_out_max;
+
+ if (ppll->pll_out_min == 0)
+ ppll->pll_out_min = 64800;
+
+ ppll->pll_in_min =
+ le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Input);
+ ppll->pll_in_max =
+ le16_to_cpu(firmware_info->info.usMaxPixelClockPLL_Input);
+
+ ppll->min_post_div = 2;
+ ppll->max_post_div = 0x7f;
+ ppll->min_frac_feedback_div = 0;
+ ppll->max_frac_feedback_div = 9;
+ ppll->min_ref_div = 2;
+ ppll->max_ref_div = 0x3ff;
+ ppll->min_feedback_div = 4;
+ ppll->max_feedback_div = 0xfff;
+ ppll->best_vco = 0;
+
+ for (i = 1; i < AMDGPU_MAX_PPLL; i++)
+ adev->clock.ppll[i] = *ppll;
+
+ /* system clock */
+ spll->reference_freq =
+ le16_to_cpu(firmware_info->info_21.usCoreReferenceClock);
+ spll->reference_div = 0;
+
+ spll->pll_out_min =
+ le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Output);
+ spll->pll_out_max =
+ le32_to_cpu(firmware_info->info.ulMaxEngineClockPLL_Output);
+
+ /* ??? */
+ if (spll->pll_out_min == 0)
+ spll->pll_out_min = 64800;
+
+ spll->pll_in_min =
+ le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Input);
+ spll->pll_in_max =
+ le16_to_cpu(firmware_info->info.usMaxEngineClockPLL_Input);
+
+ spll->min_post_div = 1;
+ spll->max_post_div = 1;
+ spll->min_ref_div = 2;
+ spll->max_ref_div = 0xff;
+ spll->min_feedback_div = 4;
+ spll->max_feedback_div = 0xff;
+ spll->best_vco = 0;
+
+ /* memory clock */
+ mpll->reference_freq =
+ le16_to_cpu(firmware_info->info_21.usMemoryReferenceClock);
+ mpll->reference_div = 0;
+
+ mpll->pll_out_min =
+ le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Output);
+ mpll->pll_out_max =
+ le32_to_cpu(firmware_info->info.ulMaxMemoryClockPLL_Output);
+
+ /* ??? */
+ if (mpll->pll_out_min == 0)
+ mpll->pll_out_min = 64800;
+
+ mpll->pll_in_min =
+ le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Input);
+ mpll->pll_in_max =
+ le16_to_cpu(firmware_info->info.usMaxMemoryClockPLL_Input);
+
+ adev->clock.default_sclk =
+ le32_to_cpu(firmware_info->info.ulDefaultEngineClock);
+ adev->clock.default_mclk =
+ le32_to_cpu(firmware_info->info.ulDefaultMemoryClock);
+
+ mpll->min_post_div = 1;
+ mpll->max_post_div = 1;
+ mpll->min_ref_div = 2;
+ mpll->max_ref_div = 0xff;
+ mpll->min_feedback_div = 4;
+ mpll->max_feedback_div = 0xff;
+ mpll->best_vco = 0;
+
+ /* disp clock */
+ adev->clock.default_dispclk =
+ le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
+ /* set a reasonable default for DP */
+ if (adev->clock.default_dispclk < 53900) {
+ DRM_DEBUG("Changing default dispclk from %dMhz to 600Mhz\n",
+ adev->clock.default_dispclk / 100);
+ adev->clock.default_dispclk = 60000;
+ } else if (adev->clock.default_dispclk <= 60000) {
+ DRM_DEBUG("Changing default dispclk from %dMhz to 625Mhz\n",
+ adev->clock.default_dispclk / 100);
+ adev->clock.default_dispclk = 62500;
+ }
+ adev->clock.dp_extclk =
+ le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
+ adev->clock.current_dispclk = adev->clock.default_dispclk;
+
+ adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
+ if (adev->clock.max_pixel_clock == 0)
+ adev->clock.max_pixel_clock = 40000;
+
+ /* not technically a clock, but... */
+ adev->mode_info.firmware_flags =
+ le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
+
+ ret = 0;
+ }
+
+ adev->pm.current_sclk = adev->clock.default_sclk;
+ adev->pm.current_mclk = adev->clock.default_mclk;
+
+ return ret;
+}
+
+union gfx_info {
+ ATOM_GFX_INFO_V2_1 info;
+};
+
+int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, GFX_Info);
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ int ret = -EINVAL;
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ adev->gfx.config.max_shader_engines = gfx_info->info.max_shader_engines;
+ adev->gfx.config.max_tile_pipes = gfx_info->info.max_tile_pipes;
+ adev->gfx.config.max_cu_per_sh = gfx_info->info.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->info.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->info.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches =
+ gfx_info->info.max_texture_channel_caches;
+
+ ret = 0;
+ }
+ return ret;
+}
+
+union igp_info {
+ struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9 info_9;
+};
+
+/*
+ * Return vram width from integrated system info table, if available,
+ * or 0 if not.
+ */
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+ u16 data_offset, size;
+ union igp_info *igp_info;
+ u8 frev, crev;
+
+ /* get any igp specific overrides */
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 8:
+ case 9:
+ return igp_info->info_8.ucUMAChannelNumber * 64;
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_atombios_get_igp_ss_overrides(struct amdgpu_device *adev,
+ struct amdgpu_atom_ss *ss,
+ int id)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+ u16 data_offset, size;
+ union igp_info *igp_info;
+ u8 frev, crev;
+ u16 percentage = 0, rate = 0;
+
+ /* get any igp specific overrides */
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 6:
+ switch (id) {
+ case ASIC_INTERNAL_SS_ON_TMDS:
+ percentage = le16_to_cpu(igp_info->info_6.usDVISSPercentage);
+ rate = le16_to_cpu(igp_info->info_6.usDVISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_HDMI:
+ percentage = le16_to_cpu(igp_info->info_6.usHDMISSPercentage);
+ rate = le16_to_cpu(igp_info->info_6.usHDMISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_LVDS:
+ percentage = le16_to_cpu(igp_info->info_6.usLvdsSSPercentage);
+ rate = le16_to_cpu(igp_info->info_6.usLvdsSSpreadRateIn10Hz);
+ break;
+ }
+ break;
+ case 7:
+ switch (id) {
+ case ASIC_INTERNAL_SS_ON_TMDS:
+ percentage = le16_to_cpu(igp_info->info_7.usDVISSPercentage);
+ rate = le16_to_cpu(igp_info->info_7.usDVISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_HDMI:
+ percentage = le16_to_cpu(igp_info->info_7.usHDMISSPercentage);
+ rate = le16_to_cpu(igp_info->info_7.usHDMISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_LVDS:
+ percentage = le16_to_cpu(igp_info->info_7.usLvdsSSPercentage);
+ rate = le16_to_cpu(igp_info->info_7.usLvdsSSpreadRateIn10Hz);
+ break;
+ }
+ break;
+ case 8:
+ switch (id) {
+ case ASIC_INTERNAL_SS_ON_TMDS:
+ percentage = le16_to_cpu(igp_info->info_8.usDVISSPercentage);
+ rate = le16_to_cpu(igp_info->info_8.usDVISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_HDMI:
+ percentage = le16_to_cpu(igp_info->info_8.usHDMISSPercentage);
+ rate = le16_to_cpu(igp_info->info_8.usHDMISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_LVDS:
+ percentage = le16_to_cpu(igp_info->info_8.usLvdsSSPercentage);
+ rate = le16_to_cpu(igp_info->info_8.usLvdsSSpreadRateIn10Hz);
+ break;
+ }
+ break;
+ case 9:
+ switch (id) {
+ case ASIC_INTERNAL_SS_ON_TMDS:
+ percentage = le16_to_cpu(igp_info->info_9.usDVISSPercentage);
+ rate = le16_to_cpu(igp_info->info_9.usDVISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_HDMI:
+ percentage = le16_to_cpu(igp_info->info_9.usHDMISSPercentage);
+ rate = le16_to_cpu(igp_info->info_9.usHDMISSpreadRateIn10Hz);
+ break;
+ case ASIC_INTERNAL_SS_ON_LVDS:
+ percentage = le16_to_cpu(igp_info->info_9.usLvdsSSPercentage);
+ rate = le16_to_cpu(igp_info->info_9.usLvdsSSpreadRateIn10Hz);
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev);
+ break;
+ }
+ if (percentage)
+ ss->percentage = percentage;
+ if (rate)
+ ss->rate = rate;
+ }
+}
+
+union asic_ss_info {
+ struct _ATOM_ASIC_INTERNAL_SS_INFO info;
+ struct _ATOM_ASIC_INTERNAL_SS_INFO_V2 info_2;
+ struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3;
+};
+
+union asic_ss_assignment {
+ struct _ATOM_ASIC_SS_ASSIGNMENT v1;
+ struct _ATOM_ASIC_SS_ASSIGNMENT_V2 v2;
+ struct _ATOM_ASIC_SS_ASSIGNMENT_V3 v3;
+};
+
+bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
+ struct amdgpu_atom_ss *ss,
+ int id, u32 clock)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
+ uint16_t data_offset, size;
+ union asic_ss_info *ss_info;
+ union asic_ss_assignment *ss_assign;
+ uint8_t frev, crev;
+ int i, num_indices;
+
+ if (id == ASIC_INTERNAL_MEMORY_SS) {
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT))
+ return false;
+ }
+ if (id == ASIC_INTERNAL_ENGINE_SS) {
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT))
+ return false;
+ }
+
+ memset(ss, 0, sizeof(struct amdgpu_atom_ss));
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+
+ ss_info =
+ (union asic_ss_info *)(mode_info->atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 1:
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_ASIC_SS_ASSIGNMENT);
+
+ ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info.asSpreadSpectrum[0]);
+ for (i = 0; i < num_indices; i++) {
+ if ((ss_assign->v1.ucClockIndication == id) &&
+ (clock <= le32_to_cpu(ss_assign->v1.ulTargetClockRange))) {
+ ss->percentage =
+ le16_to_cpu(ss_assign->v1.usSpreadSpectrumPercentage);
+ ss->type = ss_assign->v1.ucSpreadSpectrumMode;
+ ss->rate = le16_to_cpu(ss_assign->v1.usSpreadRateInKhz);
+ ss->percentage_divider = 100;
+ return true;
+ }
+ ss_assign = (union asic_ss_assignment *)
+ ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT));
+ }
+ break;
+ case 2:
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2);
+ ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_2.asSpreadSpectrum[0]);
+ for (i = 0; i < num_indices; i++) {
+ if ((ss_assign->v2.ucClockIndication == id) &&
+ (clock <= le32_to_cpu(ss_assign->v2.ulTargetClockRange))) {
+ ss->percentage =
+ le16_to_cpu(ss_assign->v2.usSpreadSpectrumPercentage);
+ ss->type = ss_assign->v2.ucSpreadSpectrumMode;
+ ss->rate = le16_to_cpu(ss_assign->v2.usSpreadRateIn10Hz);
+ ss->percentage_divider = 100;
+ if ((crev == 2) &&
+ ((id == ASIC_INTERNAL_ENGINE_SS) ||
+ (id == ASIC_INTERNAL_MEMORY_SS)))
+ ss->rate /= 100;
+ return true;
+ }
+ ss_assign = (union asic_ss_assignment *)
+ ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2));
+ }
+ break;
+ case 3:
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
+ ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_3.asSpreadSpectrum[0]);
+ for (i = 0; i < num_indices; i++) {
+ if ((ss_assign->v3.ucClockIndication == id) &&
+ (clock <= le32_to_cpu(ss_assign->v3.ulTargetClockRange))) {
+ ss->percentage =
+ le16_to_cpu(ss_assign->v3.usSpreadSpectrumPercentage);
+ ss->type = ss_assign->v3.ucSpreadSpectrumMode;
+ ss->rate = le16_to_cpu(ss_assign->v3.usSpreadRateIn10Hz);
+ if (ss_assign->v3.ucSpreadSpectrumMode &
+ SS_MODE_V3_PERCENTAGE_DIV_BY_1000_MASK)
+ ss->percentage_divider = 1000;
+ else
+ ss->percentage_divider = 100;
+ if ((id == ASIC_INTERNAL_ENGINE_SS) ||
+ (id == ASIC_INTERNAL_MEMORY_SS))
+ ss->rate /= 100;
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_atombios_get_igp_ss_overrides(adev, ss, id);
+ return true;
+ }
+ ss_assign = (union asic_ss_assignment *)
+ ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3));
+ }
+ break;
+ default:
+ DRM_ERROR("Unsupported ASIC_InternalSS_Info table: %d %d\n", frev, crev);
+ break;
+ }
+
+ }
+ return false;
+}
+
+union get_clock_dividers {
+ struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS v1;
+ struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 v2;
+ struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 v3;
+ struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 v4;
+ struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5 v5;
+ struct _COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_6 v6_in;
+ struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 v6_out;
+};
+
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+ u8 clock_type,
+ u32 clock,
+ bool strobe_mode,
+ struct atom_clock_dividers *dividers)
+{
+ union get_clock_dividers args;
+ int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL);
+ u8 frev, crev;
+
+ memset(&args, 0, sizeof(args));
+ memset(dividers, 0, sizeof(struct atom_clock_dividers));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return -EINVAL;
+
+ switch (crev) {
+ case 2:
+ case 3:
+ case 5:
+ /* r6xx, r7xx, evergreen, ni, si.
+ * TODO: add support for asic_type <= CHIP_RV770*/
+ if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
+ args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->post_div = args.v3.ucPostDiv;
+ dividers->enable_post_div = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ dividers->enable_dithen = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+ dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
+ dividers->ref_div = args.v3.ucRefDiv;
+ dividers->vco_mode = (args.v3.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ } else {
+ /* for SI we use ComputeMemoryClockParam for memory plls */
+ if (adev->asic_type >= CHIP_TAHITI)
+ return -EINVAL;
+ args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+ if (strobe_mode)
+ args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->post_div = args.v5.ucPostDiv;
+ dividers->enable_post_div = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+ dividers->enable_dithen = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+ dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
+ dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
+ dividers->ref_div = args.v5.ucRefDiv;
+ dividers->vco_mode = (args.v5.ucCntlFlag &
+ ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+ }
+ break;
+ case 4:
+ /* fusion */
+ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
+ dividers->real_clock = le32_to_cpu(args.v4.ulClock);
+ break;
+ case 6:
+ /* CI */
+ /* COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, COMPUTE_GPUCLK_INPUT_FLAG_SCLK */
+ args.v6_in.ulClock.ulComputeClockFlag = clock_type;
+ args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
+ dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
+ dividers->ref_div = args.v6_out.ucPllRefDiv;
+ dividers->post_div = args.v6_out.ucPllPostDiv;
+ dividers->flags = args.v6_out.ucPllCntlFlag;
+ dividers->real_clock = le32_to_cpu(args.v6_out.ulClock.ulClock);
+ dividers->post_divider = args.v6_out.ulClock.ucPostDiv;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
+ u32 clock,
+ bool strobe_mode,
+ struct atom_mpll_param *mpll_param)
+{
+ COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 args;
+ int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam);
+ u8 frev, crev;
+
+ memset(&args, 0, sizeof(args));
+ memset(mpll_param, 0, sizeof(struct atom_mpll_param));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return -EINVAL;
+
+ switch (frev) {
+ case 2:
+ switch (crev) {
+ case 1:
+ /* SI */
+ args.ulClock = cpu_to_le32(clock); /* 10 khz */
+ args.ucInputFlag = 0;
+ if (strobe_mode)
+ args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
+ mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
+ mpll_param->post_div = args.ucPostDiv;
+ mpll_param->dll_speed = args.ucDllSpeed;
+ mpll_param->bwcntl = args.ucBWCntl;
+ mpll_param->vco_mode =
+ (args.ucPllCntlFlag & MPLL_CNTL_FLAG_VCO_MODE_MASK);
+ mpll_param->yclk_sel =
+ (args.ucPllCntlFlag & MPLL_CNTL_FLAG_BYPASS_DQ_PLL) ? 1 : 0;
+ mpll_param->qdr =
+ (args.ucPllCntlFlag & MPLL_CNTL_FLAG_QDR_ENABLE) ? 1 : 0;
+ mpll_param->half_rate =
+ (args.ucPllCntlFlag & MPLL_CNTL_FLAG_AD_HALF_RATE) ? 1 : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock)
+{
+ SET_ENGINE_CLOCK_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
+ u32 tmp;
+
+ memset(&args, 0, sizeof(args));
+
+ tmp = eng_clock & SET_CLOCK_FREQ_MASK;
+ tmp |= (COMPUTE_ENGINE_PLL_PARAM << 24);
+
+ args.ulTargetEngineClock = cpu_to_le32(tmp);
+ if (mem_clock)
+ args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ u16 *vddc, u16 *vddci, u16 *mvdd)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+ u8 frev, crev;
+ u16 data_offset;
+ union firmware_info *firmware_info;
+
+ *vddc = 0;
+ *vddci = 0;
+ *mvdd = 0;
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ firmware_info =
+ (union firmware_info *)(mode_info->atom_context->bios +
+ data_offset);
+ *vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+ if ((frev == 2) && (crev >= 2)) {
+ *vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
+ *mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
+ }
+ }
+}
+
+union set_voltage {
+ struct _SET_VOLTAGE_PS_ALLOCATION alloc;
+ struct _SET_VOLTAGE_PARAMETERS v1;
+ struct _SET_VOLTAGE_PARAMETERS_V2 v2;
+ struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
+};
+
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ u16 voltage_id, u16 *voltage)
+{
+ union set_voltage args;
+ int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
+ u8 frev, crev;
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return -EINVAL;
+
+ switch (crev) {
+ case 1:
+ return -EINVAL;
+ case 2:
+ args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
+ args.v2.ucVoltageMode = 0;
+ args.v2.usVoltageLevel = 0;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ *voltage = le16_to_cpu(args.v2.usVoltageLevel);
+ break;
+ case 3:
+ args.v3.ucVoltageType = voltage_type;
+ args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
+ args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ *voltage = le16_to_cpu(args.v3.usVoltageLevel);
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ u16 *voltage,
+ u16 leakage_idx)
+{
+ return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
+}
+
+union voltage_object_info {
+ struct _ATOM_VOLTAGE_OBJECT_INFO v1;
+ struct _ATOM_VOLTAGE_OBJECT_INFO_V2 v2;
+ struct _ATOM_VOLTAGE_OBJECT_INFO_V3_1 v3;
+};
+
+union voltage_object {
+ struct _ATOM_VOLTAGE_OBJECT v1;
+ struct _ATOM_VOLTAGE_OBJECT_V2 v2;
+ union _ATOM_VOLTAGE_OBJECT_V3 v3;
+};
+
+
+static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_atombios_lookup_voltage_object_v3(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *v3,
+ u8 voltage_type, u8 voltage_mode)
+{
+ u32 size = le16_to_cpu(v3->sHeader.usStructureSize);
+ u32 offset = offsetof(ATOM_VOLTAGE_OBJECT_INFO_V3_1, asVoltageObj[0]);
+ u8 *start = (u8 *)v3;
+
+ while (offset < size) {
+ ATOM_VOLTAGE_OBJECT_V3 *vo = (ATOM_VOLTAGE_OBJECT_V3 *)(start + offset);
+ if ((vo->asGpioVoltageObj.sHeader.ucVoltageType == voltage_type) &&
+ (vo->asGpioVoltageObj.sHeader.ucVoltageMode == voltage_mode))
+ return vo;
+ offset += le16_to_cpu(vo->asGpioVoltageObj.sHeader.usSize);
+ }
+ return NULL;
+}
+
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ u8 voltage_type,
+ u8 *svd_gpio_id, u8 *svc_gpio_id)
+{
+ int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+ union voltage_object_info *voltage_info;
+ union voltage_object *voltage_object = NULL;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ voltage_info = (union voltage_object_info *)
+ (adev->mode_info.atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 3:
+ switch (crev) {
+ case 1:
+ voltage_object = (union voltage_object *)
+ amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+ voltage_type,
+ VOLTAGE_OBJ_SVID2);
+ if (voltage_object) {
+ *svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
+ *svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
+ } else {
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+
+ }
+ return 0;
+}
+
+bool
+amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
+ u8 voltage_type, u8 voltage_mode)
+{
+ int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+ union voltage_object_info *voltage_info;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ voltage_info = (union voltage_object_info *)
+ (adev->mode_info.atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 3:
+ switch (crev) {
+ case 1:
+ if (amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+ voltage_type, voltage_mode))
+ return true;
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return false;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return false;
+ }
+
+ }
+ return false;
+}
+
+int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
+ u8 voltage_type, u8 voltage_mode,
+ struct atom_voltage_table *voltage_table)
+{
+ int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+ int i;
+ union voltage_object_info *voltage_info;
+ union voltage_object *voltage_object = NULL;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ voltage_info = (union voltage_object_info *)
+ (adev->mode_info.atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 3:
+ switch (crev) {
+ case 1:
+ voltage_object = (union voltage_object *)
+ amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+ voltage_type, voltage_mode);
+ if (voltage_object) {
+ ATOM_GPIO_VOLTAGE_OBJECT_V3 *gpio =
+ &voltage_object->v3.asGpioVoltageObj;
+ VOLTAGE_LUT_ENTRY_V2 *lut;
+ if (gpio->ucGpioEntryNum > MAX_VOLTAGE_ENTRIES)
+ return -EINVAL;
+ lut = &gpio->asVolGpioLut[0];
+ for (i = 0; i < gpio->ucGpioEntryNum; i++) {
+ voltage_table->entries[i].value =
+ le16_to_cpu(lut->usVoltageValue);
+ voltage_table->entries[i].smio_low =
+ le32_to_cpu(lut->ulVoltageId);
+ lut = (VOLTAGE_LUT_ENTRY_V2 *)
+ ((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY_V2));
+ }
+ voltage_table->mask_low = le32_to_cpu(gpio->ulGpioMaskVal);
+ voltage_table->count = gpio->ucGpioEntryNum;
+ voltage_table->phase_delay = gpio->ucPhaseDelay;
+ return 0;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("unknown voltage object table\n");
+ return -EINVAL;
+ }
+ }
+ return -EINVAL;
+}
+
+union vram_info {
+ struct _ATOM_VRAM_INFO_V3 v1_3;
+ struct _ATOM_VRAM_INFO_V4 v1_4;
+ struct _ATOM_VRAM_INFO_HEADER_V2_1 v2_1;
+};
+
+#define MEM_ID_MASK 0xff000000
+#define MEM_ID_SHIFT 24
+#define CLOCK_RANGE_MASK 0x00ffffff
+#define CLOCK_RANGE_SHIFT 0
+#define LOW_NIBBLE_MASK 0xf
+#define DATA_EQU_PREV 0
+#define DATA_FROM_TABLE 4
+
+int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
+ u8 module_index,
+ struct atom_mc_reg_table *reg_table)
+{
+ int index = GetIndexIntoMasterTable(DATA, VRAM_Info);
+ u8 frev, crev, num_entries, t_mem_id, num_ranges = 0;
+ u32 i = 0, j;
+ u16 data_offset, size;
+ union vram_info *vram_info;
+
+ memset(reg_table, 0, sizeof(struct atom_mc_reg_table));
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset)) {
+ vram_info = (union vram_info *)
+ (adev->mode_info.atom_context->bios + data_offset);
+ switch (frev) {
+ case 1:
+ DRM_ERROR("old table version %d, %d\n", frev, crev);
+ return -EINVAL;
+ case 2:
+ switch (crev) {
+ case 1:
+ if (module_index < vram_info->v2_1.ucNumOfVRAMModule) {
+ ATOM_INIT_REG_BLOCK *reg_block =
+ (ATOM_INIT_REG_BLOCK *)
+ ((u8 *)vram_info + le16_to_cpu(vram_info->v2_1.usMemClkPatchTblOffset));
+ ATOM_MEMORY_SETTING_DATA_BLOCK *reg_data =
+ (ATOM_MEMORY_SETTING_DATA_BLOCK *)
+ ((u8 *)reg_block + (2 * sizeof(u16)) +
+ le16_to_cpu(reg_block->usRegIndexTblSize));
+ ATOM_INIT_REG_INDEX_FORMAT *format = &reg_block->asRegIndexBuf[0];
+ num_entries = (u8)((le16_to_cpu(reg_block->usRegIndexTblSize)) /
+ sizeof(ATOM_INIT_REG_INDEX_FORMAT)) - 1;
+ if (num_entries > VBIOS_MC_REGISTER_ARRAY_SIZE)
+ return -EINVAL;
+ while (i < num_entries) {
+ if (format->ucPreRegDataLength & ACCESS_PLACEHOLDER)
+ break;
+ reg_table->mc_reg_address[i].s1 =
+ (u16)(le16_to_cpu(format->usRegIndex));
+ reg_table->mc_reg_address[i].pre_reg_data =
+ (u8)(format->ucPreRegDataLength);
+ i++;
+ format = (ATOM_INIT_REG_INDEX_FORMAT *)
+ ((u8 *)format + sizeof(ATOM_INIT_REG_INDEX_FORMAT));
+ }
+ reg_table->last = i;
+ while ((le32_to_cpu(*(u32 *)reg_data) != END_OF_REG_DATA_BLOCK) &&
+ (num_ranges < VBIOS_MAX_AC_TIMING_ENTRIES)) {
+ t_mem_id = (u8)((le32_to_cpu(*(u32 *)reg_data) & MEM_ID_MASK)
+ >> MEM_ID_SHIFT);
+ if (module_index == t_mem_id) {
+ reg_table->mc_reg_table_entry[num_ranges].mclk_max =
+ (u32)((le32_to_cpu(*(u32 *)reg_data) & CLOCK_RANGE_MASK)
+ >> CLOCK_RANGE_SHIFT);
+ for (i = 0, j = 1; i < reg_table->last; i++) {
+ if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_FROM_TABLE) {
+ reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
+ (u32)le32_to_cpu(*((u32 *)reg_data + j));
+ j++;
+ } else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
+ reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
+ reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
+ }
+ }
+ num_ranges++;
+ }
+ reg_data = (ATOM_MEMORY_SETTING_DATA_BLOCK *)
+ ((u8 *)reg_data + le16_to_cpu(reg_block->usRegDataBlkSize));
+ }
+ if (le32_to_cpu(*(u32 *)reg_data) != END_OF_REG_DATA_BLOCK)
+ return -EINVAL;
+ reg_table->num_entries = num_ranges;
+ } else
+ return -EINVAL;
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ return -EINVAL;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ return -EINVAL;
+ }
+ return 0;
+ }
+ return -EINVAL;
+}
+#endif
+
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
+{
+ int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset))
+ return true;
+
+ return false;
+}
+
+void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
+{
+ uint32_t bios_6_scratch;
+
+ bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
+
+ if (lock) {
+ bios_6_scratch |= ATOM_S6_CRITICAL_STATE;
+ bios_6_scratch &= ~ATOM_S6_ACC_MODE;
+ } else {
+ bios_6_scratch &= ~ATOM_S6_CRITICAL_STATE;
+ bios_6_scratch |= ATOM_S6_ACC_MODE;
+ }
+
+ WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
+}
+
+static void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
+{
+ uint32_t bios_2_scratch, bios_6_scratch;
+
+ adev->bios_scratch_reg_offset = mmBIOS_SCRATCH_0;
+
+ bios_2_scratch = RREG32(adev->bios_scratch_reg_offset + 2);
+ bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
+
+ /* let the bios control the backlight */
+ bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE;
+
+ /* tell the bios not to handle mode switching */
+ bios_6_scratch |= ATOM_S6_ACC_BLOCK_DISPLAY_SWITCH;
+
+ /* clear the vbios dpms state */
+ bios_2_scratch &= ~ATOM_S2_DEVICE_DPMS_STATE;
+
+ WREG32(adev->bios_scratch_reg_offset + 2, bios_2_scratch);
+ WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
+}
+
+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+ bool hung)
+{
+ u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
+
+ if (hung)
+ tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+ else
+ tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+
+ WREG32(adev->bios_scratch_reg_offset + 3, tmp);
+}
+
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+ u32 backlight_level)
+{
+ u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2);
+
+ tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+ tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+ ATOM_S2_CURRENT_BL_LEVEL_MASK;
+
+ WREG32(adev->bios_scratch_reg_offset + 2, tmp);
+}
+
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
+
+ if (tmp & ATOM_S7_ASIC_INIT_COMPLETE_MASK)
+ return false;
+ else
+ return true;
+}
+
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
+ */
+void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+{
+#ifdef __BIG_ENDIAN
+ u32 src_tmp[5], dst_tmp[5];
+ int i;
+ u8 align_num_bytes = ALIGN(num_bytes, 4);
+
+ if (to_le) {
+ memcpy(src_tmp, src, num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+ memcpy(dst, dst_tmp, align_num_bytes);
+ } else {
+ memcpy(src_tmp, src, align_num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+ memcpy(dst, dst_tmp, num_bytes);
+ }
+#else
+ memcpy(dst, src, num_bytes);
+#endif
+}
+
+static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
+ uint16_t data_offset;
+ int usage_bytes = 0;
+ struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
+ u64 start_addr;
+ u64 size;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+ firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
+
+ DRM_DEBUG("atom firmware requested %08x %dkb\n",
+ le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
+ le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
+
+ start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
+ size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb;
+
+ if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+ (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = size << 10;
+ /* Use the default scratch size */
+ usage_bytes = 0;
+ } else {
+ usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
+ }
+ }
+ ctx->scratch_size_bytes = 0;
+ if (usage_bytes == 0)
+ usage_bytes = 20 * 1024;
+ /* allocate some scratch memory */
+ ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+ if (!ctx->scratch)
+ return -ENOMEM;
+ ctx->scratch_size_bytes = usage_bytes;
+ return 0;
+}
+
+/* ATOM accessor methods */
+/*
+ * ATOM is an interpreted byte code stored in tables in the vbios. The
+ * driver registers callbacks to access registers and the interpreter
+ * in the driver parses the tables and executes then to program specific
+ * actions (set display modes, asic init, etc.). See amdgpu_atombios.c,
+ * atombios.h, and atom.c
+ */
+
+/**
+ * cail_pll_read - read PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the PLL register.
+ */
+static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
+{
+ return 0;
+}
+
+/**
+ * cail_pll_write - write PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_mc_read - read MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ *
+ * Provides an MC register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MC register.
+ */
+static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
+{
+ return 0;
+}
+
+/**
+ * cail_mc_write - write MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MC register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_reg_write - write MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MMIO register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = drm_to_adev(info->dev);
+
+ WREG32(reg, val);
+}
+
+/**
+ * cail_reg_read - read MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ *
+ * Provides an MMIO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MMIO register.
+ */
+static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
+{
+ struct amdgpu_device *adev = drm_to_adev(info->dev);
+ uint32_t r;
+
+ r = RREG32(reg);
+ return r;
+}
+
+static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
+}
+
+static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
+ NULL);
+
+static struct attribute *amdgpu_vbios_version_attrs[] = {
+ &dev_attr_vbios_version.attr,
+ NULL
+};
+
+const struct attribute_group amdgpu_vbios_version_attr_group = {
+ .attrs = amdgpu_vbios_version_attrs
+};
+
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.atom_context)
+ return devm_device_add_group(adev->dev,
+ &amdgpu_vbios_version_attr_group);
+
+ return 0;
+}
+
+/**
+ * amdgpu_atombios_fini - free the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the driver info and register access callbacks for the ATOM
+ * interpreter (r4xx+).
+ * Called at driver shutdown.
+ */
+void amdgpu_atombios_fini(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.atom_context) {
+ kfree(adev->mode_info.atom_context->scratch);
+ kfree(adev->mode_info.atom_context->iio);
+ }
+ kfree(adev->mode_info.atom_context);
+ adev->mode_info.atom_context = NULL;
+ kfree(adev->mode_info.atom_card_info);
+ adev->mode_info.atom_card_info = NULL;
+}
+
+/**
+ * amdgpu_atombios_init - init the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initializes the driver info and register access callbacks for the
+ * ATOM interpreter (r4xx+).
+ * Returns 0 on sucess, -ENOMEM on failure.
+ * Called at driver startup.
+ */
+int amdgpu_atombios_init(struct amdgpu_device *adev)
+{
+ struct card_info *atom_card_info =
+ kzalloc(sizeof(struct card_info), GFP_KERNEL);
+
+ if (!atom_card_info)
+ return -ENOMEM;
+
+ adev->mode_info.atom_card_info = atom_card_info;
+ atom_card_info->dev = adev_to_drm(adev);
+ atom_card_info->reg_read = cail_reg_read;
+ atom_card_info->reg_write = cail_reg_write;
+ atom_card_info->mc_read = cail_mc_read;
+ atom_card_info->mc_write = cail_mc_write;
+ atom_card_info->pll_read = cail_pll_read;
+ atom_card_info->pll_write = cail_pll_write;
+
+ adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios);
+ if (!adev->mode_info.atom_context) {
+ amdgpu_atombios_fini(adev);
+ return -ENOMEM;
+ }
+
+ mutex_init(&adev->mode_info.atom_context->mutex);
+ if (adev->is_atom_fw) {
+ amdgpu_atomfirmware_scratch_regs_init(adev);
+ amdgpu_atomfirmware_allocate_fb_scratch(adev);
+ /* cached firmware_flags for further usage */
+ adev->mode_info.firmware_flags =
+ amdgpu_atomfirmware_query_firmware_capability(adev);
+ } else {
+ amdgpu_atombios_scratch_regs_init(adev);
+ amdgpu_atombios_allocate_fb_scratch(adev);
+ }
+
+ return 0;
+}
+
+int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
+ uint32_t table,
+ uint16_t *size,
+ uint8_t *frev,
+ uint8_t *crev,
+ uint8_t **addr)
+{
+ uint16_t data_start;
+
+ if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, table,
+ size, frev, crev, &data_start))
+ return -EINVAL;
+
+ *addr = (uint8_t *)adev->mode_info.atom_context->bios + data_start;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
new file mode 100644
index 000000000..0811474e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ATOMBIOS_H__
+#define __AMDGPU_ATOMBIOS_H__
+
+struct atom_clock_dividers {
+ u32 post_div;
+ union {
+ struct {
+#ifdef __BIG_ENDIAN
+ u32 reserved : 6;
+ u32 whole_fb_div : 12;
+ u32 frac_fb_div : 14;
+#else
+ u32 frac_fb_div : 14;
+ u32 whole_fb_div : 12;
+ u32 reserved : 6;
+#endif
+ };
+ u32 fb_div;
+ };
+ u32 ref_div;
+ bool enable_post_div;
+ bool enable_dithen;
+ u32 vco_mode;
+ u32 real_clock;
+ /* added for CI */
+ u32 post_divider;
+ u32 flags;
+};
+
+struct atom_mpll_param {
+ union {
+ struct {
+#ifdef __BIG_ENDIAN
+ u32 reserved : 8;
+ u32 clkfrac : 12;
+ u32 clkf : 12;
+#else
+ u32 clkf : 12;
+ u32 clkfrac : 12;
+ u32 reserved : 8;
+#endif
+ };
+ u32 fb_div;
+ };
+ u32 post_div;
+ u32 bwcntl;
+ u32 dll_speed;
+ u32 vco_mode;
+ u32 yclk_sel;
+ u32 qdr;
+ u32 half_rate;
+};
+
+#define MEM_TYPE_GDDR5 0x50
+#define MEM_TYPE_GDDR4 0x40
+#define MEM_TYPE_GDDR3 0x30
+#define MEM_TYPE_DDR2 0x20
+#define MEM_TYPE_GDDR1 0x10
+#define MEM_TYPE_DDR3 0xb0
+#define MEM_TYPE_MASK 0xf0
+
+struct atom_memory_info {
+ u8 mem_vendor;
+ u8 mem_type;
+};
+
+#define MAX_AC_TIMING_ENTRIES 16
+
+struct atom_memory_clock_range_table {
+ u8 num_entries;
+ u8 rsv[3];
+ u32 mclk[MAX_AC_TIMING_ENTRIES];
+};
+
+#define VBIOS_MC_REGISTER_ARRAY_SIZE 32
+#define VBIOS_MAX_AC_TIMING_ENTRIES 20
+
+struct atom_mc_reg_entry {
+ u32 mclk_max;
+ u32 mc_data[VBIOS_MC_REGISTER_ARRAY_SIZE];
+};
+
+struct atom_mc_register_address {
+ u16 s1;
+ u8 pre_reg_data;
+};
+
+struct atom_mc_reg_table {
+ u8 last;
+ u8 num_entries;
+ struct atom_mc_reg_entry mc_reg_table_entry[VBIOS_MAX_AC_TIMING_ENTRIES];
+ struct atom_mc_register_address mc_reg_address[VBIOS_MC_REGISTER_ARRAY_SIZE];
+};
+
+#define MAX_VOLTAGE_ENTRIES 32
+
+struct atom_voltage_table_entry {
+ u16 value;
+ u32 smio_low;
+};
+
+struct atom_voltage_table {
+ u32 count;
+ u32 mask_low;
+ u32 phase_delay;
+ struct atom_voltage_table_entry entries[MAX_VOLTAGE_ENTRIES];
+};
+
+struct amdgpu_gpio_rec
+amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+ u8 id);
+
+struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+ uint8_t id);
+void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+
+bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
+
+bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev);
+
+bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
+ struct amdgpu_atom_ss *ss,
+ int id, u32 clock);
+
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+ u8 clock_type,
+ u32 clock,
+ bool strobe_mode,
+ struct atom_clock_dividers *dividers);
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
+ u32 clock,
+ bool strobe_mode,
+ struct atom_mpll_param *mpll_param);
+
+void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock);
+
+bool
+amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
+ u8 voltage_type, u8 voltage_mode);
+
+int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
+ u8 voltage_type, u8 voltage_mode,
+ struct atom_voltage_table *voltage_table);
+
+int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
+ u8 module_index,
+ struct atom_mc_reg_table *reg_table);
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+ u16 voltage_id, u16 *voltage);
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+ u16 *voltage,
+ u16 leakage_idx);
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+ u16 *vddc, u16 *vddci, u16 *mvdd);
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+ u8 voltage_type,
+ u8 *svd_gpio_id, u8 *svc_gpio_id);
+#endif
+
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
+
+void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+ bool hung);
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+ u32 backlight_level);
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
+
+void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+ u8 clock_type,
+ u32 clock,
+ bool strobe_mode,
+ struct atom_clock_dividers *dividers);
+
+int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
+ uint32_t table,
+ uint16_t *size,
+ uint8_t *frev,
+ uint8_t *crev,
+ uint8_t **addr);
+
+void amdgpu_atombios_fini(struct amdgpu_device *adev);
+int amdgpu_atombios_init(struct amdgpu_device *adev);
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
new file mode 100644
index 000000000..fb2681dd6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -0,0 +1,945 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atomfirmware.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
+#include "atombios.h"
+#include "soc15_hw_ip.h"
+
+union firmware_info {
+ struct atom_firmware_info_v3_1 v31;
+ struct atom_firmware_info_v3_2 v32;
+ struct atom_firmware_info_v3_3 v33;
+ struct atom_firmware_info_v3_4 v34;
+};
+
+/*
+ * Helper function to query firmware capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return firmware_capability in firmwareinfo table on success or 0 if not
+ */
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union firmware_info *firmware_info;
+ u8 frev, crev;
+ u32 fw_cap = 0;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+ index, &size, &frev, &crev, &data_offset)) {
+ /* support firmware_info 3.1 + */
+ if ((frev == 3 && crev >= 1) || (frev > 3)) {
+ firmware_info = (union firmware_info *)
+ (mode_info->atom_context->bios + data_offset);
+ fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability);
+ }
+ }
+
+ return fw_cap;
+}
+
+/*
+ * Helper function to query gpu virtualizaiton capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if gpu virtualization is supported or false if not
+ */
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) ? true : false;
+}
+
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
+{
+ int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ uint16_t data_offset;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+ NULL, NULL, &data_offset)) {
+ struct atom_firmware_info_v3_1 *firmware_info =
+ (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+ data_offset);
+
+ adev->bios_scratch_reg_offset =
+ le32_to_cpu(firmware_info->bios_scratch_reg_startaddr);
+ }
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+ u32 start_addr, fw_size, drv_size;
+
+ start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+ drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+ DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+ start_addr,
+ fw_size,
+ drv_size);
+
+ if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+ (u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ /* Use the default scratch size */
+ *usage_bytes = 0;
+ } else {
+ *usage_bytes = drv_size << 10;
+ }
+ return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+ struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+ u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+ fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+ fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+ drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+ drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+ DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+ fw_start_addr,
+ fw_size,
+ drv_start_addr,
+ drv_size);
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* Firmware request VRAM reservation for SR-IOV */
+ adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.fw_vram_usage_size = fw_size << 10;
+ }
+
+ if (amdgpu_sriov_vf(adev) &&
+ ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+ ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+ /* driver request VRAM reservation for SR-IOV */
+ adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+ (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+ adev->mman.drv_vram_usage_size = drv_size << 10;
+ }
+
+ *usage_bytes = 0;
+ return 0;
+}
+
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_usagebyfirmware);
+ struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+ struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
+ u16 data_offset;
+ u8 frev, crev;
+ int usage_bytes = 0;
+
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
+ }
+ }
+
+ ctx->scratch_size_bytes = 0;
+ if (usage_bytes == 0)
+ usage_bytes = 20 * 1024;
+ /* allocate some scratch memory */
+ ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+ if (!ctx->scratch)
+ return -ENOMEM;
+ ctx->scratch_size_bytes = usage_bytes;
+ return 0;
+}
+
+union igp_info {
+ struct atom_integrated_system_info_v1_11 v11;
+ struct atom_integrated_system_info_v1_12 v12;
+ struct atom_integrated_system_info_v2_1 v21;
+};
+
+union umc_info {
+ struct atom_umc_info_v3_1 v31;
+ struct atom_umc_info_v3_2 v32;
+ struct atom_umc_info_v3_3 v33;
+ struct atom_umc_info_v4_0 v40;
+};
+
+union vram_info {
+ struct atom_vram_info_header_v2_3 v23;
+ struct atom_vram_info_header_v2_4 v24;
+ struct atom_vram_info_header_v2_5 v25;
+ struct atom_vram_info_header_v2_6 v26;
+ struct atom_vram_info_header_v3_0 v30;
+};
+
+union vram_module {
+ struct atom_vram_module_v9 v9;
+ struct atom_vram_module_v10 v10;
+ struct atom_vram_module_v11 v11;
+ struct atom_vram_module_v3_0 v30;
+};
+
+static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
+ int atom_mem_type)
+{
+ int vram_type;
+
+ if (adev->flags & AMD_IS_APU) {
+ switch (atom_mem_type) {
+ case Ddr2MemType:
+ case LpDdr2MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR2;
+ break;
+ case Ddr3MemType:
+ case LpDdr3MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR3;
+ break;
+ case Ddr4MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ break;
+ case LpDdr4MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
+ break;
+ case Ddr5MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR5;
+ break;
+ case LpDdr5MemType:
+ vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
+ break;
+ default:
+ vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ break;
+ }
+ } else {
+ switch (atom_mem_type) {
+ case ATOM_DGPU_VRAM_TYPE_GDDR5:
+ vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+ break;
+ case ATOM_DGPU_VRAM_TYPE_HBM2:
+ case ATOM_DGPU_VRAM_TYPE_HBM2E:
+ case ATOM_DGPU_VRAM_TYPE_HBM3:
+ vram_type = AMDGPU_VRAM_TYPE_HBM;
+ break;
+ case ATOM_DGPU_VRAM_TYPE_GDDR6:
+ vram_type = AMDGPU_VRAM_TYPE_GDDR6;
+ break;
+ default:
+ vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return vram_type;
+}
+
+
+int
+amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type,
+ int *vram_vendor)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index, i = 0;
+ u16 data_offset, size;
+ union igp_info *igp_info;
+ union vram_info *vram_info;
+ union vram_module *vram_module;
+ u8 frev, crev;
+ u8 mem_type;
+ u8 mem_vendor;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
+ u32 module_id;
+
+ if (adev->flags & AMD_IS_APU)
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ integratedsysteminfo);
+ else
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_info);
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size,
+ &frev, &crev, &data_offset)) {
+ if (adev->flags & AMD_IS_APU) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 11:
+ case 12:
+ mem_channel_number = igp_info->v11.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v11.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case 2:
+ switch (crev) {
+ case 1:
+ case 2:
+ mem_channel_number = igp_info->v21.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v21.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ /* invalid frev */
+ return -EINVAL;
+ }
+ }
+
+ }
+
+ return 0;
+}
+
+/*
+ * Return true if vbios enabled ecc by default, if umc info table is available
+ * or false if ecc is not enabled or umc info table is not available
+ */
+bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union umc_info *umc_info;
+ u8 frev, crev;
+ bool ecc_default_enabled = false;
+ u8 umc_config;
+ u32 umc_config1;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ umc_info);
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size, &frev, &crev, &data_offset)) {
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+ if (frev == 3) {
+ switch (crev) {
+ case 1:
+ umc_config = le32_to_cpu(umc_info->v31.umc_config);
+ ecc_default_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ case 2:
+ umc_config = le32_to_cpu(umc_info->v32.umc_config);
+ ecc_default_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+ break;
+ case 3:
+ umc_config = le32_to_cpu(umc_info->v33.umc_config);
+ umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
+ ecc_default_enabled =
+ ((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else if (frev == 4) {
+ switch (crev) {
+ case 0:
+ umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+ ecc_default_enabled =
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else {
+ /* unsupported frev */
+ return false;
+ }
+ }
+
+ return ecc_default_enabled;
+}
+
+/*
+ * Helper function to query sram ecc capablity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports sram ecc or false if not
+ */
+bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false;
+}
+
+/*
+ * Helper function to query dynamic boot config capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports dynamic boot config or false if not
+ */
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
+}
+
+/**
+ * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
+ * @adev: amdgpu_device pointer
+ * @i2c_address: pointer to u8; if not NULL, will contain
+ * the RAS EEPROM address if the function returns true
+ *
+ * Return true if VBIOS supports RAS EEPROM address reporting,
+ * else return false. If true and @i2c_address is not NULL,
+ * will contain the RAS ROM address.
+ */
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
+ u8 *i2c_address)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union firmware_info *firmware_info;
+ u8 frev, crev;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+ index, &size, &frev, &crev,
+ &data_offset)) {
+ /* support firmware_info 3.4 + */
+ if ((frev == 3 && crev >= 4) || (frev > 3)) {
+ firmware_info = (union firmware_info *)
+ (mode_info->atom_context->bios + data_offset);
+ /* The ras_rom_i2c_slave_addr should ideally
+ * be a 19-bit EEPROM address, which would be
+ * used as is by the driver; see top of
+ * amdgpu_eeprom.c.
+ *
+ * When this is the case, 0 is of course a
+ * valid RAS EEPROM address, in which case,
+ * we'll drop the first "if (firm...)" and only
+ * leave the check for the pointer.
+ *
+ * The reason this works right now is because
+ * ras_rom_i2c_slave_addr contains the EEPROM
+ * device type qualifier 1010b in the top 4
+ * bits.
+ */
+ if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+ if (i2c_address)
+ *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+union smu_info {
+ struct atom_smu_info_v3_1 v31;
+ struct atom_smu_info_v4_0 v40;
+};
+
+union gfx_info {
+ struct atom_gfx_info_v2_2 v22;
+ struct atom_gfx_info_v2_4 v24;
+ struct atom_gfx_info_v2_7 v27;
+ struct atom_gfx_info_v3_0 v30;
+};
+
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct amdgpu_pll *spll = &adev->clock.spll;
+ struct amdgpu_pll *mpll = &adev->clock.mpll;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ int ret = -EINVAL, index;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union firmware_info *firmware_info =
+ (union firmware_info *)(mode_info->atom_context->bios +
+ data_offset);
+
+ adev->clock.default_sclk =
+ le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+ adev->clock.default_mclk =
+ le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+
+ adev->pm.current_sclk = adev->clock.default_sclk;
+ adev->pm.current_mclk = adev->clock.default_mclk;
+
+ ret = 0;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ smu_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union smu_info *smu_info =
+ (union smu_info *)(mode_info->atom_context->bios +
+ data_offset);
+
+ /* system clock */
+ if (frev == 3)
+ spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+ else if (frev == 4)
+ spll->reference_freq = le32_to_cpu(smu_info->v40.core_refclk_10khz);
+
+ spll->reference_div = 0;
+ spll->min_post_div = 1;
+ spll->max_post_div = 1;
+ spll->min_ref_div = 2;
+ spll->max_ref_div = 0xff;
+ spll->min_feedback_div = 4;
+ spll->max_feedback_div = 0xff;
+ spll->best_vco = 0;
+
+ ret = 0;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ umc_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union umc_info *umc_info =
+ (union umc_info *)(mode_info->atom_context->bios +
+ data_offset);
+
+ /* memory clock */
+ mpll->reference_freq = le32_to_cpu(umc_info->v31.mem_refclk_10khz);
+
+ mpll->reference_div = 0;
+ mpll->min_post_div = 1;
+ mpll->max_post_div = 1;
+ mpll->min_ref_div = 2;
+ mpll->max_ref_div = 0xff;
+ mpll->min_feedback_div = 4;
+ mpll->max_feedback_div = 0xff;
+ mpll->best_vco = 0;
+
+ ret = 0;
+ }
+
+ /* if asic is Navi+, the rlc reference clock is used for system clock
+ * from vbios gfx_info table */
+ if (adev->asic_type >= CHIP_NAVI10) {
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ gfx_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+ if ((frev == 3) ||
+ (frev == 2 && crev == 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk);
+ ret = 0;
+ } else if ((frev == 2) &&
+ (crev >= 2) &&
+ (crev != 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk);
+ ret = 0;
+ } else {
+ BUG();
+ }
+ }
+ }
+
+ return ret;
+}
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ gfx_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+ if (frev == 2) {
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ case 7:
+ adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 3) {
+ switch (crev) {
+ case 0:
+ adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ }
+ return -EINVAL;
+}
+
+/*
+ * Helper function to query two stage mem training capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if two stage mem training is supported or false if not
+ */
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev)
+{
+ u32 fw_cap;
+
+ fw_cap = adev->mode_info.firmware_flags;
+
+ return (fw_cap & ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) ? true : false;
+}
+
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ union firmware_info *firmware_info;
+ int index;
+ u16 data_offset, size;
+ u8 frev, crev;
+ int fw_reserved_fb_size;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+
+ if (!amdgpu_atom_parse_data_header(ctx, index, &size,
+ &frev, &crev, &data_offset))
+ /* fail to parse data_header */
+ return 0;
+
+ firmware_info = (union firmware_info *)(ctx->bios + data_offset);
+
+ if (frev != 3)
+ return -EINVAL;
+
+ switch (crev) {
+ case 4:
+ fw_reserved_fb_size =
+ (firmware_info->v34.fw_reserved_size_in_kb << 10);
+ break;
+ default:
+ fw_reserved_fb_size = 0;
+ break;
+ }
+
+ return fw_reserved_fb_size;
+}
+
+/*
+ * Helper function to execute asic_init table
+ *
+ * @adev: amdgpu_device pointer
+ * @fb_reset: flag to indicate whether fb is reset or not
+ *
+ * Return 0 if succeed, otherwise failed
+ */
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz;
+ struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1;
+ int index;
+
+ if (!mode_info)
+ return -EINVAL;
+
+ ctx = mode_info->atom_context;
+ if (!ctx)
+ return -EINVAL;
+
+ /* query bootup sclk/mclk from firmware_info table */
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union firmware_info *firmware_info =
+ (union firmware_info *)(ctx->bios +
+ data_offset);
+
+ bootup_sclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+ bootup_mclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+ } else {
+ return -EINVAL;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
+ asic_init);
+ if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
+ if (frev == 2 && crev >= 1) {
+ memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
+ asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz;
+ asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz;
+ asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT;
+ if (!fb_reset)
+ asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT;
+ else
+ asic_init_ps_v2_1.param.memparam.memflag = 0;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
new file mode 100644
index 000000000..c7eb2caec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ATOMFIRMWARE_H__
+#define __AMDGPU_ATOMFIRMWARE_H__
+
+#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
+
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address);
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
new file mode 100644
index 000000000..375f02002
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * ATPX support for both Intel/ATI
+ */
+#include <linux/vga_switcheroo.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "amdgpu.h"
+#include "amd_acpi.h"
+
+#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0)
+
+struct amdgpu_px_quirk {
+ u32 chip_vendor;
+ u32 chip_device;
+ u32 subsys_vendor;
+ u32 subsys_device;
+ u32 px_quirk_flags;
+};
+
+struct amdgpu_atpx_functions {
+ bool px_params;
+ bool power_cntl;
+ bool disp_mux_cntl;
+ bool i2c_mux_cntl;
+ bool switch_start;
+ bool switch_end;
+ bool disp_connectors_mapping;
+ bool disp_detection_ports;
+};
+
+struct amdgpu_atpx {
+ acpi_handle handle;
+ struct amdgpu_atpx_functions functions;
+ bool is_hybrid;
+ bool dgpu_req_power_for_displays;
+};
+
+static struct amdgpu_atpx_priv {
+ bool atpx_detected;
+ bool bridge_pm_usable;
+ unsigned int quirks;
+ /* handle for device - and atpx */
+ acpi_handle dhandle;
+ acpi_handle other_handle;
+ struct amdgpu_atpx atpx;
+} amdgpu_atpx_priv;
+
+struct atpx_verify_interface {
+ u16 size; /* structure size in bytes (includes size field) */
+ u16 version; /* version */
+ u32 function_bits; /* supported functions bit vector */
+} __packed;
+
+struct atpx_px_params {
+ u16 size; /* structure size in bytes (includes size field) */
+ u32 valid_flags; /* which flags are valid */
+ u32 flags; /* flags */
+} __packed;
+
+struct atpx_power_control {
+ u16 size;
+ u8 dgpu_state;
+} __packed;
+
+struct atpx_mux {
+ u16 size;
+ u16 mux;
+} __packed;
+
+bool amdgpu_has_atpx(void)
+{
+ return amdgpu_atpx_priv.atpx_detected;
+}
+
+bool amdgpu_has_atpx_dgpu_power_cntl(void)
+{
+ return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void)
+{
+ return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
+bool amdgpu_atpx_dgpu_req_power_for_displays(void)
+{
+ return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
+}
+
+#if defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void)
+{
+ return amdgpu_atpx_priv.dhandle;
+}
+#endif
+
+/**
+ * amdgpu_atpx_call - call an ATPX method
+ *
+ * @handle: acpi handle
+ * @function: the ATPX function to execute
+ * @params: ATPX function params
+ *
+ * Executes the requested ATPX function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
+ struct acpi_buffer *params)
+{
+ acpi_status status;
+ union acpi_object atpx_arg_elements[2];
+ struct acpi_object_list atpx_arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ atpx_arg.count = 2;
+ atpx_arg.pointer = &atpx_arg_elements[0];
+
+ atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+ atpx_arg_elements[0].integer.value = function;
+
+ if (params) {
+ atpx_arg_elements[1].type = ACPI_TYPE_BUFFER;
+ atpx_arg_elements[1].buffer.length = params->length;
+ atpx_arg_elements[1].buffer.pointer = params->pointer;
+ } else {
+ /* We need a second fake parameter */
+ atpx_arg_elements[1].type = ACPI_TYPE_INTEGER;
+ atpx_arg_elements[1].integer.value = 0;
+ }
+
+ status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+
+ /* Fail only if calling the method fails and ATPX is supported */
+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ pr_err("failed to evaluate ATPX got %s\n",
+ acpi_format_exception(status));
+ kfree(buffer.pointer);
+ return NULL;
+ }
+
+ return buffer.pointer;
+}
+
+/**
+ * amdgpu_atpx_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATPX
+ *
+ * Use the supported functions mask from ATPX function
+ * ATPX_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mask)
+{
+ f->px_params = mask & ATPX_GET_PX_PARAMETERS_SUPPORTED;
+ f->power_cntl = mask & ATPX_POWER_CONTROL_SUPPORTED;
+ f->disp_mux_cntl = mask & ATPX_DISPLAY_MUX_CONTROL_SUPPORTED;
+ f->i2c_mux_cntl = mask & ATPX_I2C_MUX_CONTROL_SUPPORTED;
+ f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;
+ f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;
+ f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED;
+ f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
+}
+
+/**
+ * amdgpu_atpx_validate - validate ATPX functions
+ *
+ * @atpx: amdgpu atpx struct
+ *
+ * Validate that required functions are enabled (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
+{
+ u32 valid_bits = 0;
+
+ if (atpx->functions.px_params) {
+ union acpi_object *info;
+ struct atpx_px_params output;
+ size_t size;
+
+ info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
+ if (!info)
+ return -EIO;
+
+ memset(&output, 0, sizeof(output));
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 10) {
+ pr_err("ATPX buffer is too small: %zu\n", size);
+ kfree(info);
+ return -EINVAL;
+ }
+ size = min(sizeof(output), size);
+
+ memcpy(&output, info->buffer.pointer, size);
+
+ valid_bits = output.flags & output.valid_flags;
+
+ kfree(info);
+ }
+
+ /* if separate mux flag is set, mux controls are required */
+ if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+ atpx->functions.i2c_mux_cntl = true;
+ atpx->functions.disp_mux_cntl = true;
+ }
+ /* if any outputs are muxed, mux controls are required */
+ if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+ ATPX_TV_SIGNAL_MUXED |
+ ATPX_DFP_SIGNAL_MUXED))
+ atpx->functions.disp_mux_cntl = true;
+
+
+ /* some bioses set these bits rather than flagging power_cntl as supported */
+ if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+ ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+ atpx->functions.power_cntl = true;
+
+ atpx->is_hybrid = false;
+ if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+ if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
+ pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n");
+ atpx->functions.power_cntl = true;
+ atpx->is_hybrid = false;
+ } else {
+ pr_notice("ATPX Hybrid Graphics\n");
+ /*
+ * Disable legacy PM methods only when pcie port PM is usable,
+ * otherwise the device might fail to power off or power on.
+ */
+ atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
+ atpx->is_hybrid = true;
+ }
+ }
+
+ atpx->dgpu_req_power_for_displays = false;
+ if (valid_bits & ATPX_DGPU_REQ_POWER_FOR_DISPLAYS)
+ atpx->dgpu_req_power_for_displays = true;
+
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_verify_interface - verify ATPX
+ *
+ * @atpx: amdgpu atpx struct
+ *
+ * Execute the ATPX_FUNCTION_VERIFY_INTERFACE ATPX function
+ * to initialize ATPX and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
+{
+ union acpi_object *info;
+ struct atpx_verify_interface output;
+ size_t size;
+ int err = 0;
+
+ info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_VERIFY_INTERFACE, NULL);
+ if (!info)
+ return -EIO;
+
+ memset(&output, 0, sizeof(output));
+
+ size = *(u16 *) info->buffer.pointer;
+ if (size < 8) {
+ pr_err("ATPX buffer is too small: %zu\n", size);
+ err = -EINVAL;
+ goto out;
+ }
+ size = min(sizeof(output), size);
+
+ memcpy(&output, info->buffer.pointer, size);
+
+ /* TODO: check version? */
+ pr_notice("ATPX version %u, functions 0x%08x\n",
+ output.version, output.function_bits);
+
+ amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits);
+
+out:
+ kfree(info);
+ return err;
+}
+
+/**
+ * amdgpu_atpx_set_discrete_state - power up/down discrete GPU
+ *
+ * @atpx: atpx info struct
+ * @state: discrete GPU state (0 = power down, 1 = power up)
+ *
+ * Execute the ATPX_FUNCTION_POWER_CONTROL ATPX function to
+ * power down/up the discrete GPU (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
+{
+ struct acpi_buffer params;
+ union acpi_object *info;
+ struct atpx_power_control input;
+
+ if (atpx->functions.power_cntl) {
+ input.size = 3;
+ input.dgpu_state = state;
+ params.length = input.size;
+ params.pointer = &input;
+ info = amdgpu_atpx_call(atpx->handle,
+ ATPX_FUNCTION_POWER_CONTROL,
+ &params);
+ if (!info)
+ return -EIO;
+ kfree(info);
+
+ /* 200ms delay is required after off */
+ if (state == 0)
+ msleep(200);
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_disp_mux - switch display mux
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_DISPLAY_MUX_CONTROL ATPX function to
+ * switch the display mux between the discrete GPU and integrated GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_disp_mux(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+ struct acpi_buffer params;
+ union acpi_object *info;
+ struct atpx_mux input;
+
+ if (atpx->functions.disp_mux_cntl) {
+ input.size = 4;
+ input.mux = mux_id;
+ params.length = input.size;
+ params.pointer = &input;
+ info = amdgpu_atpx_call(atpx->handle,
+ ATPX_FUNCTION_DISPLAY_MUX_CONTROL,
+ &params);
+ if (!info)
+ return -EIO;
+ kfree(info);
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_i2c_mux - switch i2c/hpd mux
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_I2C_MUX_CONTROL ATPX function to
+ * switch the i2c/hpd mux between the discrete GPU and integrated GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_i2c_mux(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+ struct acpi_buffer params;
+ union acpi_object *info;
+ struct atpx_mux input;
+
+ if (atpx->functions.i2c_mux_cntl) {
+ input.size = 4;
+ input.mux = mux_id;
+ params.length = input.size;
+ params.pointer = &input;
+ info = amdgpu_atpx_call(atpx->handle,
+ ATPX_FUNCTION_I2C_MUX_CONTROL,
+ &params);
+ if (!info)
+ return -EIO;
+ kfree(info);
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_start - notify the sbios of a GPU switch
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION ATPX
+ * function to notify the sbios that a switch between the discrete GPU and
+ * integrated GPU has begun (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_start(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+ struct acpi_buffer params;
+ union acpi_object *info;
+ struct atpx_mux input;
+
+ if (atpx->functions.switch_start) {
+ input.size = 4;
+ input.mux = mux_id;
+ params.length = input.size;
+ params.pointer = &input;
+ info = amdgpu_atpx_call(atpx->handle,
+ ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION,
+ &params);
+ if (!info)
+ return -EIO;
+ kfree(info);
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_end - notify the sbios of a GPU switch
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION ATPX
+ * function to notify the sbios that a switch between the discrete GPU and
+ * integrated GPU has ended (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_end(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+ struct acpi_buffer params;
+ union acpi_object *info;
+ struct atpx_mux input;
+
+ if (atpx->functions.switch_end) {
+ input.size = 4;
+ input.mux = mux_id;
+ params.length = input.size;
+ params.pointer = &input;
+ info = amdgpu_atpx_call(atpx->handle,
+ ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION,
+ &params);
+ if (!info)
+ return -EIO;
+ kfree(info);
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_switchto - switch to the requested GPU
+ *
+ * @id: GPU to switch to
+ *
+ * Execute the necessary ATPX functions to switch between the discrete GPU and
+ * integrated GPU (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switchto(enum vga_switcheroo_client_id id)
+{
+ u16 gpu_id;
+
+ if (id == VGA_SWITCHEROO_IGD)
+ gpu_id = ATPX_INTEGRATED_GPU;
+ else
+ gpu_id = ATPX_DISCRETE_GPU;
+
+ amdgpu_atpx_switch_start(&amdgpu_atpx_priv.atpx, gpu_id);
+ amdgpu_atpx_switch_disp_mux(&amdgpu_atpx_priv.atpx, gpu_id);
+ amdgpu_atpx_switch_i2c_mux(&amdgpu_atpx_priv.atpx, gpu_id);
+ amdgpu_atpx_switch_end(&amdgpu_atpx_priv.atpx, gpu_id);
+
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_power_state - power down/up the requested GPU
+ *
+ * @id: GPU to power down/up
+ * @state: requested power state (0 = off, 1 = on)
+ *
+ * Execute the necessary ATPX function to power down/up the discrete GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_power_state(enum vga_switcheroo_client_id id,
+ enum vga_switcheroo_state state)
+{
+ /* on w500 ACPI can't change intel gpu state */
+ if (id == VGA_SWITCHEROO_IGD)
+ return 0;
+
+ amdgpu_atpx_set_discrete_state(&amdgpu_atpx_priv.atpx, state);
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_pci_probe_handle - look up the ATPX handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATPX handles (all asics).
+ * Returns true if the handles are found, false if not.
+ */
+static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev)
+{
+ acpi_handle dhandle, atpx_handle;
+ acpi_status status;
+
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ return false;
+
+ status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+ if (ACPI_FAILURE(status)) {
+ amdgpu_atpx_priv.other_handle = dhandle;
+ return false;
+ }
+ amdgpu_atpx_priv.dhandle = dhandle;
+ amdgpu_atpx_priv.atpx.handle = atpx_handle;
+ return true;
+}
+
+/**
+ * amdgpu_atpx_init - verify the ATPX interface
+ *
+ * Verify the ATPX interface (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_init(void)
+{
+ int r;
+
+ /* set up the ATPX handle */
+ r = amdgpu_atpx_verify_interface(&amdgpu_atpx_priv.atpx);
+ if (r)
+ return r;
+
+ /* validate the atpx setup */
+ r = amdgpu_atpx_validate(&amdgpu_atpx_priv.atpx);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_atpx_get_client_id - get the client id
+ *
+ * @pdev: pci device
+ *
+ * look up whether we are the integrated or discrete GPU (all asics).
+ * Returns the client id.
+ */
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+{
+ if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
+ return VGA_SWITCHEROO_IGD;
+ else
+ return VGA_SWITCHEROO_DIS;
+}
+
+static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
+ .switchto = amdgpu_atpx_switchto,
+ .power_state = amdgpu_atpx_power_state,
+ .get_client_id = amdgpu_atpx_get_client_id,
+};
+
+static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
+ /* HG _PR3 doesn't seem to work on this A+A weston board */
+ { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0, 0, 0, 0, 0 },
+};
+
+static void amdgpu_atpx_get_quirks(struct pci_dev *pdev)
+{
+ const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list;
+
+ /* Apply PX quirks */
+ while (p && p->chip_device != 0) {
+ if (pdev->vendor == p->chip_vendor &&
+ pdev->device == p->chip_device &&
+ pdev->subsystem_vendor == p->subsys_vendor &&
+ pdev->subsystem_device == p->subsys_device) {
+ amdgpu_atpx_priv.quirks |= p->px_quirk_flags;
+ break;
+ }
+ ++p;
+ }
+}
+
+/**
+ * amdgpu_atpx_detect - detect whether we have PX
+ *
+ * Check if we have a PX system (all asics).
+ * Returns true if we have a PX system, false if not.
+ */
+static bool amdgpu_atpx_detect(void)
+{
+ char acpi_method_name[255] = { 0 };
+ struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+ struct pci_dev *pdev = NULL;
+ bool has_atpx = false;
+ int vga_count = 0;
+ bool d3_supported = false;
+ struct pci_dev *parent_pdev;
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ vga_count++;
+
+ has_atpx |= amdgpu_atpx_pci_probe_handle(pdev);
+
+ parent_pdev = pci_upstream_bridge(pdev);
+ d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+ amdgpu_atpx_get_quirks(pdev);
+ }
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
+ vga_count++;
+
+ has_atpx |= amdgpu_atpx_pci_probe_handle(pdev);
+
+ parent_pdev = pci_upstream_bridge(pdev);
+ d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+ amdgpu_atpx_get_quirks(pdev);
+ }
+
+ if (has_atpx && vga_count == 2) {
+ acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer);
+ pr_info("vga_switcheroo: detected switching method %s handle\n",
+ acpi_method_name);
+ amdgpu_atpx_priv.atpx_detected = true;
+ amdgpu_atpx_priv.bridge_pm_usable = d3_supported;
+ amdgpu_atpx_init();
+ return true;
+ }
+ return false;
+}
+
+/**
+ * amdgpu_register_atpx_handler - register with vga_switcheroo
+ *
+ * Register the PX callbacks with vga_switcheroo (all asics).
+ */
+void amdgpu_register_atpx_handler(void)
+{
+ bool r;
+ enum vga_switcheroo_handler_flags_t handler_flags = 0;
+
+ /* detect if we have any ATPX + 2 VGA in the system */
+ r = amdgpu_atpx_detect();
+ if (!r)
+ return;
+
+ vga_switcheroo_register_handler(&amdgpu_atpx_handler, handler_flags);
+}
+
+/**
+ * amdgpu_unregister_atpx_handler - unregister with vga_switcheroo
+ *
+ * Unregister the PX callbacks with vga_switcheroo (all asics).
+ */
+void amdgpu_unregister_atpx_handler(void)
+{
+ vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
new file mode 100644
index 000000000..edc6377ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Jerome Glisse
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+
+#define AMDGPU_BENCHMARK_ITERATIONS 1024
+#define AMDGPU_BENCHMARK_COMMON_MODES_N 17
+
+static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
+ uint64_t saddr, uint64_t daddr, int n, s64 *time_ms)
+{
+ ktime_t stime, etime;
+ struct dma_fence *fence;
+ int i, r;
+
+ stime = ktime_get();
+ for (i = 0; i < n; i++) {
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
+ false, false, false);
+ if (r)
+ goto exit_do_move;
+ r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ if (r)
+ goto exit_do_move;
+ }
+
+exit_do_move:
+ etime = ktime_get();
+ *time_ms = ktime_ms_delta(etime, stime);
+
+ return r;
+}
+
+
+static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
+ int n, unsigned size,
+ s64 time_ms,
+ unsigned sdomain, unsigned ddomain,
+ char *kind)
+{
+ s64 throughput = (n * (size >> 10));
+
+ throughput = div64_s64(throughput, time_ms);
+
+ dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
+ " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
+ kind, n, size >> 10, sdomain, ddomain, time_ms,
+ throughput * 8, throughput);
+}
+
+static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
+ unsigned sdomain, unsigned ddomain)
+{
+ struct amdgpu_bo *dobj = NULL;
+ struct amdgpu_bo *sobj = NULL;
+ uint64_t saddr, daddr;
+ s64 time_ms;
+ int r, n;
+
+ n = AMDGPU_BENCHMARK_ITERATIONS;
+
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, sdomain,
+ &sobj,
+ &saddr,
+ NULL);
+ if (r)
+ goto out_cleanup;
+ r = amdgpu_bo_create_kernel(adev, size,
+ PAGE_SIZE, ddomain,
+ &dobj,
+ &daddr,
+ NULL);
+ if (r)
+ goto out_cleanup;
+
+ if (adev->mman.buffer_funcs) {
+ r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms);
+ if (r)
+ goto out_cleanup;
+ else
+ amdgpu_benchmark_log_results(adev, n, size, time_ms,
+ sdomain, ddomain, "dma");
+ }
+
+out_cleanup:
+ /* Check error value now. The value can be overwritten when clean up.*/
+ if (r < 0)
+ dev_info(adev->dev, "Error while benchmarking BO move.\n");
+
+ if (sobj)
+ amdgpu_bo_free_kernel(&sobj, &saddr, NULL);
+ if (dobj)
+ amdgpu_bo_free_kernel(&dobj, &daddr, NULL);
+ return r;
+}
+
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
+{
+ int i, r;
+ static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
+ 640 * 480 * 4,
+ 720 * 480 * 4,
+ 800 * 600 * 4,
+ 848 * 480 * 4,
+ 1024 * 768 * 4,
+ 1152 * 768 * 4,
+ 1280 * 720 * 4,
+ 1280 * 800 * 4,
+ 1280 * 854 * 4,
+ 1280 * 960 * 4,
+ 1280 * 1024 * 4,
+ 1440 * 900 * 4,
+ 1400 * 1050 * 4,
+ 1680 * 1050 * 4,
+ 1600 * 1200 * 4,
+ 1920 * 1080 * 4,
+ 1920 * 1200 * 4
+ };
+
+ mutex_lock(&adev->benchmark_mutex);
+ switch (test_number) {
+ case 1:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
+ test_number);
+ /* simple test, VRAM to GTT and GTT to VRAM */
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ break;
+ case 2:
+ dev_info(adev->dev,
+ "benchmark test: %d (simple test, VRAM to VRAM)\n",
+ test_number);
+ /* simple test, VRAM to VRAM */
+ r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ break;
+ case 3:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
+ /* GTT to VRAM, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
+ break;
+ case 4:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
+ test_number);
+ /* VRAM to GTT, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
+ break;
+ case 5:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
+ test_number);
+ /* VRAM to VRAM, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 16384; i <<= 1) {
+ r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
+ break;
+ case 6:
+ dev_info(adev->dev,
+ "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
+ test_number);
+ /* GTT to VRAM, buffer size sweep, common modes */
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
+ break;
+ case 7:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
+ test_number);
+ /* VRAM to GTT, buffer size sweep, common modes */
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto done;
+ }
+ break;
+ case 8:
+ dev_info(adev->dev,
+ "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
+ test_number);
+ /* VRAM to VRAM, buffer size sweep, common modes */
+ for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+ r = amdgpu_benchmark_move(adev, common_modes[i],
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto done;
+ }
+ break;
+
+ default:
+ dev_info(adev->dev, "Unknown benchmark %d\n", test_number);
+ r = -EINVAL;
+ break;
+ }
+
+done:
+ mutex_unlock(&adev->benchmark_mutex);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
new file mode 100644
index 000000000..f3a09ecb7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+#include "amdgpu.h"
+#include "atom.h"
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+/*
+ * BIOS.
+ */
+
+#define AMD_VBIOS_SIGNATURE " 761295520"
+#define AMD_VBIOS_SIGNATURE_OFFSET 0x30
+#define AMD_VBIOS_SIGNATURE_SIZE sizeof(AMD_VBIOS_SIGNATURE)
+#define AMD_VBIOS_SIGNATURE_END (AMD_VBIOS_SIGNATURE_OFFSET + AMD_VBIOS_SIGNATURE_SIZE)
+#define AMD_IS_VALID_VBIOS(p) ((p)[0] == 0x55 && (p)[1] == 0xAA)
+#define AMD_VBIOS_LENGTH(p) ((p)[2] << 9)
+
+/* Check if current bios is an ATOM BIOS.
+ * Return true if it is ATOM BIOS. Otherwise, return false.
+ */
+static bool check_atom_bios(uint8_t *bios, size_t size)
+{
+ uint16_t tmp, bios_header_start;
+
+ if (!bios || size < 0x49) {
+ DRM_INFO("vbios mem is null or mem size is wrong\n");
+ return false;
+ }
+
+ if (!AMD_IS_VALID_VBIOS(bios)) {
+ DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ return false;
+ }
+
+ bios_header_start = bios[0x48] | (bios[0x49] << 8);
+ if (!bios_header_start) {
+ DRM_INFO("Can't locate bios header\n");
+ return false;
+ }
+
+ tmp = bios_header_start + 4;
+ if (size < tmp) {
+ DRM_INFO("BIOS header is broken\n");
+ return false;
+ }
+
+ if (!memcmp(bios + tmp, "ATOM", 4) ||
+ !memcmp(bios + tmp, "MOTA", 4)) {
+ DRM_DEBUG("ATOMBIOS detected\n");
+ return true;
+ }
+
+ return false;
+}
+
+/* If you boot an IGP board with a discrete card as the primary,
+ * the IGP rom is not accessible via the rom bar as the IGP rom is
+ * part of the system bios. On boot, the system bios puts a
+ * copy of the igp rom at the start of vram if a discrete card is
+ * present.
+ */
+static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
+{
+ uint8_t __iomem *bios;
+ resource_size_t vram_base;
+ resource_size_t size = 256 * 1024; /* ??? */
+
+ if (!(adev->flags & AMD_IS_APU))
+ if (amdgpu_device_need_post(adev))
+ return false;
+
+ /* FB BAR not enabled */
+ if (pci_resource_len(adev->pdev, 0) == 0)
+ return false;
+
+ adev->bios = NULL;
+ vram_base = pci_resource_start(adev->pdev, 0);
+ bios = ioremap_wc(vram_base, size);
+ if (!bios)
+ return false;
+
+ adev->bios = kmalloc(size, GFP_KERNEL);
+ if (!adev->bios) {
+ iounmap(bios);
+ return false;
+ }
+ adev->bios_size = size;
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
+
+ if (!check_atom_bios(adev->bios, size)) {
+ kfree(adev->bios);
+ return false;
+ }
+
+ return true;
+}
+
+bool amdgpu_read_bios(struct amdgpu_device *adev)
+{
+ uint8_t __iomem *bios;
+ size_t size;
+
+ adev->bios = NULL;
+ /* XXX: some cards may return 0 for rom size? ddx has a workaround */
+ bios = pci_map_rom(adev->pdev, &size);
+ if (!bios)
+ return false;
+
+ adev->bios = kzalloc(size, GFP_KERNEL);
+ if (adev->bios == NULL) {
+ pci_unmap_rom(adev->pdev, bios);
+ return false;
+ }
+ adev->bios_size = size;
+ memcpy_fromio(adev->bios, bios, size);
+ pci_unmap_rom(adev->pdev, bios);
+
+ if (!check_atom_bios(adev->bios, size)) {
+ kfree(adev->bios);
+ return false;
+ }
+
+ return true;
+}
+
+static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
+{
+ u8 header[AMD_VBIOS_SIGNATURE_END+1] = {0};
+ int len;
+
+ if (!adev->asic_funcs || !adev->asic_funcs->read_bios_from_rom)
+ return false;
+
+ /* validate VBIOS signature */
+ if (amdgpu_asic_read_bios_from_rom(adev, &header[0], sizeof(header)) == false)
+ return false;
+ header[AMD_VBIOS_SIGNATURE_END] = 0;
+
+ if ((!AMD_IS_VALID_VBIOS(header)) ||
+ memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
+ AMD_VBIOS_SIGNATURE,
+ strlen(AMD_VBIOS_SIGNATURE)) != 0)
+ return false;
+
+ /* valid vbios, go on */
+ len = AMD_VBIOS_LENGTH(header);
+ len = ALIGN(len, 4);
+ adev->bios = kmalloc(len, GFP_KERNEL);
+ if (!adev->bios) {
+ DRM_ERROR("no memory to allocate for BIOS\n");
+ return false;
+ }
+ adev->bios_size = len;
+
+ /* read complete BIOS */
+ amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
+
+ if (!check_atom_bios(adev->bios, len)) {
+ kfree(adev->bios);
+ return false;
+ }
+
+ return true;
+}
+
+static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
+{
+ phys_addr_t rom = adev->pdev->rom;
+ size_t romlen = adev->pdev->romlen;
+ void __iomem *bios;
+
+ adev->bios = NULL;
+
+ if (!rom || romlen == 0)
+ return false;
+
+ adev->bios = kzalloc(romlen, GFP_KERNEL);
+ if (!adev->bios)
+ return false;
+
+ bios = ioremap(rom, romlen);
+ if (!bios)
+ goto free_bios;
+
+ memcpy_fromio(adev->bios, bios, romlen);
+ iounmap(bios);
+
+ if (!check_atom_bios(adev->bios, romlen))
+ goto free_bios;
+
+ adev->bios_size = romlen;
+
+ return true;
+free_bios:
+ kfree(adev->bios);
+ return false;
+}
+
+#ifdef CONFIG_ACPI
+/* ATRM is used to get the BIOS on the discrete cards in
+ * dual-gpu systems.
+ */
+/* retrieve the ROM in 4k blocks */
+#define ATRM_BIOS_PAGE 4096
+/**
+ * amdgpu_atrm_call - fetch a chunk of the vbios
+ *
+ * @atrm_handle: acpi ATRM handle
+ * @bios: vbios image pointer
+ * @offset: offset of vbios image data to fetch
+ * @len: length of vbios image data to fetch
+ *
+ * Executes ATRM to fetch a chunk of the discrete
+ * vbios image on PX systems (all asics).
+ * Returns the length of the buffer fetched.
+ */
+static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+ int offset, int len)
+{
+ acpi_status status;
+ union acpi_object atrm_arg_elements[2], *obj;
+ struct acpi_object_list atrm_arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+ atrm_arg.count = 2;
+ atrm_arg.pointer = &atrm_arg_elements[0];
+
+ atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+ atrm_arg_elements[0].integer.value = offset;
+
+ atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+ atrm_arg_elements[1].integer.value = len;
+
+ status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+ if (ACPI_FAILURE(status)) {
+ DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+ return -ENODEV;
+ }
+
+ obj = (union acpi_object *)buffer.pointer;
+ memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length);
+ len = obj->buffer.length;
+ kfree(buffer.pointer);
+ return len;
+}
+
+static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
+{
+ int ret;
+ int size = 256 * 1024;
+ int i;
+ struct pci_dev *pdev = NULL;
+ acpi_handle dhandle, atrm_handle;
+ acpi_status status;
+ bool found = false;
+
+ /* ATRM is for the discrete card only */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* ATRM is for on-platform devices only */
+ if (dev_is_removable(&adev->pdev->dev))
+ return false;
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ continue;
+
+ status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+ if (ACPI_SUCCESS(status)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (!dhandle)
+ continue;
+
+ status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+ if (ACPI_SUCCESS(status)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (!found)
+ return false;
+ pci_dev_put(pdev);
+
+ adev->bios = kmalloc(size, GFP_KERNEL);
+ if (!adev->bios) {
+ dev_err(adev->dev, "Unable to allocate bios\n");
+ return false;
+ }
+
+ for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+ ret = amdgpu_atrm_call(atrm_handle,
+ adev->bios,
+ (i * ATRM_BIOS_PAGE),
+ ATRM_BIOS_PAGE);
+ if (ret < ATRM_BIOS_PAGE)
+ break;
+ }
+
+ if (!check_atom_bios(adev->bios, size)) {
+ kfree(adev->bios);
+ return false;
+ }
+ adev->bios_size = size;
+ return true;
+}
+#else
+static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
+{
+ return false;
+}
+#endif
+
+static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ return igp_read_bios_from_vram(adev);
+ else
+ return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+ false : amdgpu_asic_read_disabled_bios(adev);
+}
+
+#ifdef CONFIG_ACPI
+static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
+{
+ struct acpi_table_header *hdr;
+ acpi_size tbl_size;
+ UEFI_ACPI_VFCT *vfct;
+ unsigned int offset;
+
+ if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
+ return false;
+ tbl_size = hdr->length;
+ if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
+ dev_info(adev->dev, "ACPI VFCT table present but broken (too short #1),skipping\n");
+ return false;
+ }
+
+ vfct = (UEFI_ACPI_VFCT *)hdr;
+ offset = vfct->VBIOSImageOffset;
+
+ while (offset < tbl_size) {
+ GOP_VBIOS_CONTENT *vbios = (GOP_VBIOS_CONTENT *)((char *)hdr + offset);
+ VFCT_IMAGE_HEADER *vhdr = &vbios->VbiosHeader;
+
+ offset += sizeof(VFCT_IMAGE_HEADER);
+ if (offset > tbl_size) {
+ dev_info(adev->dev, "ACPI VFCT image header truncated,skipping\n");
+ return false;
+ }
+
+ offset += vhdr->ImageLength;
+ if (offset > tbl_size) {
+ dev_info(adev->dev, "ACPI VFCT image truncated,skipping\n");
+ return false;
+ }
+
+ if (vhdr->ImageLength &&
+ vhdr->PCIBus == adev->pdev->bus->number &&
+ vhdr->PCIDevice == PCI_SLOT(adev->pdev->devfn) &&
+ vhdr->PCIFunction == PCI_FUNC(adev->pdev->devfn) &&
+ vhdr->VendorID == adev->pdev->vendor &&
+ vhdr->DeviceID == adev->pdev->device) {
+ adev->bios = kmemdup(&vbios->VbiosContent,
+ vhdr->ImageLength,
+ GFP_KERNEL);
+
+ if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
+ kfree(adev->bios);
+ return false;
+ }
+ adev->bios_size = vhdr->ImageLength;
+ return true;
+ }
+ }
+
+ dev_info(adev->dev, "ACPI VFCT table present but broken (too short #2),skipping\n");
+ return false;
+}
+#else
+static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
+{
+ return false;
+}
+#endif
+
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+ if (amdgpu_atrm_get_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
+ goto success;
+ }
+
+ if (amdgpu_acpi_vfct_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+ goto success;
+ }
+
+ if (igp_read_bios_from_vram(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios_from_rom(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM\n");
+ goto success;
+ }
+
+ if (amdgpu_read_disabled_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from disabled ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+ return false;
+
+success:
+ adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+ return true;
+}
+
+/* helper function for soc15 and onwards to read bios from rom */
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ u32 i, length_dw;
+ u32 rom_offset;
+ u32 rom_index_offset;
+ u32 rom_data_offset;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (!adev->smuio.funcs ||
+ !adev->smuio.funcs->get_rom_index_offset ||
+ !adev->smuio.funcs->get_rom_data_offset)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ rom_index_offset =
+ adev->smuio.funcs->get_rom_index_offset(adev);
+ rom_data_offset =
+ adev->smuio.funcs->get_rom_data_offset(adev);
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_rom_offset) {
+ rom_offset = adev->nbio.funcs->get_rom_offset(adev);
+ rom_offset = rom_offset << 17;
+ } else {
+ rom_offset = 0;
+ }
+
+ /* set rom index to rom_offset */
+ WREG32(rom_index_offset, rom_offset);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(rom_data_offset);
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
new file mode 100644
index 000000000..9a53ca555
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/sort.h>
+#include <linux/uaccess.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
+#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
+
+static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
+{
+ struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
+ rhead);
+ mutex_destroy(&list->bo_list_mutex);
+ kvfree(list);
+}
+
+static void amdgpu_bo_list_free(struct kref *ref)
+{
+ struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+ refcount);
+ struct amdgpu_bo_list_entry *e;
+
+ amdgpu_bo_list_for_each_entry(e, list)
+ amdgpu_bo_unref(&e->bo);
+ call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
+
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+ const struct amdgpu_bo_list_entry *a = _a, *b = _b;
+
+ if (a->priority > b->priority)
+ return 1;
+ if (a->priority < b->priority)
+ return -1;
+ return 0;
+}
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ size_t num_entries, struct amdgpu_bo_list **result)
+{
+ unsigned last_entry = 0, first_userptr = num_entries;
+ struct amdgpu_bo_list_entry *array;
+ struct amdgpu_bo_list *list;
+ uint64_t total_size = 0;
+ size_t size;
+ unsigned i;
+ int r;
+
+ if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
+ / sizeof(struct amdgpu_bo_list_entry))
+ return -EINVAL;
+
+ size = sizeof(struct amdgpu_bo_list);
+ size += num_entries * sizeof(struct amdgpu_bo_list_entry);
+ list = kvmalloc(size, GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ kref_init(&list->refcount);
+ list->gds_obj = NULL;
+ list->gws_obj = NULL;
+ list->oa_obj = NULL;
+
+ array = amdgpu_bo_list_array_entry(list, 0);
+ memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
+
+ for (i = 0; i < num_entries; ++i) {
+ struct amdgpu_bo_list_entry *entry;
+ struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
+ struct mm_struct *usermm;
+
+ gobj = drm_gem_object_lookup(filp, info[i].bo_handle);
+ if (!gobj) {
+ r = -ENOENT;
+ goto error_free;
+ }
+
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+ if (usermm) {
+ if (usermm != current->mm) {
+ amdgpu_bo_unref(&bo);
+ r = -EPERM;
+ goto error_free;
+ }
+ entry = &array[--first_userptr];
+ } else {
+ entry = &array[last_entry++];
+ }
+
+ entry->priority = min(info[i].bo_priority,
+ AMDGPU_BO_LIST_MAX_PRIORITY);
+ entry->bo = bo;
+
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
+ list->gds_obj = bo;
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
+ list->gws_obj = bo;
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
+ list->oa_obj = bo;
+
+ total_size += amdgpu_bo_size(bo);
+ trace_amdgpu_bo_list_set(list, bo);
+ }
+
+ list->first_userptr = first_userptr;
+ list->num_entries = num_entries;
+ sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+ amdgpu_bo_list_entry_cmp, NULL);
+
+ trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+
+ mutex_init(&list->bo_list_mutex);
+ *result = list;
+ return 0;
+
+error_free:
+ for (i = 0; i < last_entry; ++i)
+ amdgpu_bo_unref(&array[i].bo);
+ for (i = first_userptr; i < num_entries; ++i)
+ amdgpu_bo_unref(&array[i].bo);
+ kvfree(list);
+ return r;
+
+}
+
+static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
+{
+ struct amdgpu_bo_list *list;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ list = idr_remove(&fpriv->bo_list_handles, id);
+ mutex_unlock(&fpriv->bo_list_lock);
+ if (list)
+ kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+ struct amdgpu_bo_list **result)
+{
+ rcu_read_lock();
+ *result = idr_find(&fpriv->bo_list_handles, id);
+
+ if (*result && kref_get_unless_zero(&(*result)->refcount)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ rcu_read_unlock();
+ *result = NULL;
+ return -ENOENT;
+}
+
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
+{
+ kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param)
+{
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+ const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ struct drm_amdgpu_bo_list_entry *info;
+ int r;
+
+ info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ /* copy the handle array from userspace to a kernel buffer */
+ r = -EFAULT;
+ if (likely(info_size == in->bo_info_size)) {
+ unsigned long bytes = in->bo_number *
+ in->bo_info_size;
+
+ if (copy_from_user(info, uptr, bytes))
+ goto error_free;
+
+ } else {
+ unsigned long bytes = min(in->bo_info_size, info_size);
+ unsigned i;
+
+ memset(info, 0, in->bo_number * info_size);
+ for (i = 0; i < in->bo_number; ++i) {
+ if (copy_from_user(&info[i], uptr, bytes))
+ goto error_free;
+
+ uptr += in->bo_info_size;
+ }
+ }
+
+ *info_param = info;
+ return 0;
+
+error_free:
+ kvfree(info);
+ return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ union drm_amdgpu_bo_list *args = data;
+ uint32_t handle = args->in.list_handle;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+ struct amdgpu_bo_list *list, *old;
+ int r;
+
+ r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+ if (r)
+ return r;
+
+ switch (args->in.operation) {
+ case AMDGPU_BO_LIST_OP_CREATE:
+ r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+ &list);
+ if (r)
+ goto error_free;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+ mutex_unlock(&fpriv->bo_list_lock);
+ if (r < 0) {
+ goto error_put_list;
+ }
+
+ handle = r;
+ break;
+
+ case AMDGPU_BO_LIST_OP_DESTROY:
+ amdgpu_bo_list_destroy(fpriv, handle);
+ handle = 0;
+ break;
+
+ case AMDGPU_BO_LIST_OP_UPDATE:
+ r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+ &list);
+ if (r)
+ goto error_free;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ old = idr_replace(&fpriv->bo_list_handles, list, handle);
+ mutex_unlock(&fpriv->bo_list_lock);
+
+ if (IS_ERR(old)) {
+ r = PTR_ERR(old);
+ goto error_put_list;
+ }
+
+ amdgpu_bo_list_put(old);
+ break;
+
+ default:
+ r = -EINVAL;
+ goto error_free;
+ }
+
+ memset(args, 0, sizeof(*args));
+ args->out.list_handle = handle;
+ kvfree(info);
+
+ return 0;
+
+error_put_list:
+ amdgpu_bo_list_put(list);
+
+error_free:
+ kvfree(info);
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
new file mode 100644
index 000000000..26c01cb13
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_BO_LIST_H__
+#define __AMDGPU_BO_LIST_H__
+
+#include <drm/amdgpu_drm.h>
+
+struct hmm_range;
+
+struct drm_file;
+
+struct amdgpu_device;
+struct amdgpu_bo;
+struct amdgpu_bo_va;
+struct amdgpu_fpriv;
+
+struct amdgpu_bo_list_entry {
+ struct amdgpu_bo *bo;
+ struct amdgpu_bo_va *bo_va;
+ uint32_t priority;
+ struct page **user_pages;
+ struct hmm_range *range;
+ bool user_invalidated;
+};
+
+struct amdgpu_bo_list {
+ struct rcu_head rhead;
+ struct kref refcount;
+ struct amdgpu_bo *gds_obj;
+ struct amdgpu_bo *gws_obj;
+ struct amdgpu_bo *oa_obj;
+ unsigned first_userptr;
+ unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
+};
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+ struct amdgpu_bo_list **result);
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ size_t num_entries,
+ struct amdgpu_bo_list **list);
+
+static inline struct amdgpu_bo_list_entry *
+amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
+{
+ struct amdgpu_bo_list_entry *array = (void *)&list[1];
+
+ return &array[index];
+}
+
+#define amdgpu_bo_list_for_each_entry(e, list) \
+ for (e = amdgpu_bo_list_array_entry(list, 0); \
+ e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ ++e)
+
+#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
+ for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
+ e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ ++e)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
new file mode 100644
index 000000000..b8280be62
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <linux/firmware.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_ucode.h"
+
+struct amdgpu_cgs_device {
+ struct cgs_device base;
+ struct amdgpu_device *adev;
+};
+
+#define CGS_FUNC_ADEV \
+ struct amdgpu_device *adev = \
+ ((struct amdgpu_cgs_device *)cgs_device)->adev
+
+
+static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset)
+{
+ CGS_FUNC_ADEV;
+ return RREG32(offset);
+}
+
+static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset,
+ uint32_t value)
+{
+ CGS_FUNC_ADEV;
+ WREG32(offset, value);
+}
+
+static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
+ enum cgs_ind_reg space,
+ unsigned int index)
+{
+ CGS_FUNC_ADEV;
+ switch (space) {
+ case CGS_IND_REG__PCIE:
+ return RREG32_PCIE(index);
+ case CGS_IND_REG__SMC:
+ return RREG32_SMC(index);
+ case CGS_IND_REG__UVD_CTX:
+ return RREG32_UVD_CTX(index);
+ case CGS_IND_REG__DIDT:
+ return RREG32_DIDT(index);
+ case CGS_IND_REG_GC_CAC:
+ return RREG32_GC_CAC(index);
+ case CGS_IND_REG_SE_CAC:
+ return RREG32_SE_CAC(index);
+ case CGS_IND_REG__AUDIO_ENDPT:
+ DRM_ERROR("audio endpt register access not implemented.\n");
+ return 0;
+ default:
+ BUG();
+ }
+ WARN(1, "Invalid indirect register space");
+ return 0;
+}
+
+static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
+ enum cgs_ind_reg space,
+ unsigned int index, uint32_t value)
+{
+ CGS_FUNC_ADEV;
+ switch (space) {
+ case CGS_IND_REG__PCIE:
+ return WREG32_PCIE(index, value);
+ case CGS_IND_REG__SMC:
+ return WREG32_SMC(index, value);
+ case CGS_IND_REG__UVD_CTX:
+ return WREG32_UVD_CTX(index, value);
+ case CGS_IND_REG__DIDT:
+ return WREG32_DIDT(index, value);
+ case CGS_IND_REG_GC_CAC:
+ return WREG32_GC_CAC(index, value);
+ case CGS_IND_REG_SE_CAC:
+ return WREG32_SE_CAC(index, value);
+ case CGS_IND_REG__AUDIO_ENDPT:
+ DRM_ERROR("audio endpt register access not implemented.\n");
+ return;
+ default:
+ BUG();
+ }
+ WARN(1, "Invalid indirect register space");
+}
+
+static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
+{
+ CGS_FUNC_ADEV;
+ enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM;
+
+ switch (fw_type) {
+ case CGS_UCODE_ID_SDMA0:
+ result = AMDGPU_UCODE_ID_SDMA0;
+ break;
+ case CGS_UCODE_ID_SDMA1:
+ result = AMDGPU_UCODE_ID_SDMA1;
+ break;
+ case CGS_UCODE_ID_CP_CE:
+ result = AMDGPU_UCODE_ID_CP_CE;
+ break;
+ case CGS_UCODE_ID_CP_PFP:
+ result = AMDGPU_UCODE_ID_CP_PFP;
+ break;
+ case CGS_UCODE_ID_CP_ME:
+ result = AMDGPU_UCODE_ID_CP_ME;
+ break;
+ case CGS_UCODE_ID_CP_MEC:
+ case CGS_UCODE_ID_CP_MEC_JT1:
+ result = AMDGPU_UCODE_ID_CP_MEC1;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT2:
+ /* for VI. JT2 should be the same as JT1, because:
+ 1, MEC2 and MEC1 use exactly same FW.
+ 2, JT2 is not pached but JT1 is.
+ */
+ if (adev->asic_type >= CHIP_TOPAZ)
+ result = AMDGPU_UCODE_ID_CP_MEC1;
+ else
+ result = AMDGPU_UCODE_ID_CP_MEC2;
+ break;
+ case CGS_UCODE_ID_RLC_G:
+ result = AMDGPU_UCODE_ID_RLC_G;
+ break;
+ case CGS_UCODE_ID_STORAGE:
+ result = AMDGPU_UCODE_ID_STORAGE;
+ break;
+ default:
+ DRM_ERROR("Firmware type not supported\n");
+ }
+ return result;
+}
+
+static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
+ enum cgs_ucode_id type)
+{
+ CGS_FUNC_ADEV;
+ uint16_t fw_version = 0;
+
+ switch (type) {
+ case CGS_UCODE_ID_SDMA0:
+ fw_version = adev->sdma.instance[0].fw_version;
+ break;
+ case CGS_UCODE_ID_SDMA1:
+ fw_version = adev->sdma.instance[1].fw_version;
+ break;
+ case CGS_UCODE_ID_CP_CE:
+ fw_version = adev->gfx.ce_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_PFP:
+ fw_version = adev->gfx.pfp_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_ME:
+ fw_version = adev->gfx.me_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT1:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_CP_MEC_JT2:
+ fw_version = adev->gfx.mec_fw_version;
+ break;
+ case CGS_UCODE_ID_RLC_G:
+ fw_version = adev->gfx.rlc_fw_version;
+ break;
+ case CGS_UCODE_ID_STORAGE:
+ break;
+ default:
+ DRM_ERROR("firmware type %d do not have version\n", type);
+ break;
+ }
+ return fw_version;
+}
+
+static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
+ enum cgs_ucode_id type,
+ struct cgs_firmware_info *info)
+{
+ CGS_FUNC_ADEV;
+
+ if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) {
+ uint64_t gpu_addr;
+ uint32_t data_size;
+ const struct gfx_firmware_header_v1_0 *header;
+ enum AMDGPU_UCODE_ID id;
+ struct amdgpu_firmware_info *ucode;
+
+ id = fw_type_convert(cgs_device, type);
+ ucode = &adev->firmware.ucode[id];
+ if (ucode->fw == NULL)
+ return -EINVAL;
+
+ gpu_addr = ucode->mc_addr;
+ header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ data_size = le32_to_cpu(header->header.ucode_size_bytes);
+
+ if ((type == CGS_UCODE_ID_CP_MEC_JT1) ||
+ (type == CGS_UCODE_ID_CP_MEC_JT2)) {
+ gpu_addr += ALIGN(le32_to_cpu(header->header.ucode_size_bytes), PAGE_SIZE);
+ data_size = le32_to_cpu(header->jt_size) << 2;
+ }
+
+ info->kptr = ucode->kaddr;
+ info->image_size = data_size;
+ info->mc_addr = gpu_addr;
+ info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
+
+ if (type == CGS_UCODE_ID_CP_MEC)
+ info->image_size = le32_to_cpu(header->jt_offset) << 2;
+
+ info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
+ info->feature_version = (uint16_t)le32_to_cpu(header->ucode_feature_version);
+ } else {
+ char fw_name[30] = {0};
+ int err = 0;
+ uint32_t ucode_size;
+ uint32_t ucode_start_address;
+ const uint8_t *src;
+ const struct smc_firmware_header_v1_0 *hdr;
+ const struct common_firmware_header *header;
+ struct amdgpu_firmware_info *ucode = NULL;
+
+ if (!adev->pm.fw) {
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ strcpy(fw_name, "radeon/tahiti_smc.bin");
+ break;
+ case CHIP_PITCAIRN:
+ if ((adev->pdev->revision == 0x81) &&
+ ((adev->pdev->device == 0x6810) ||
+ (adev->pdev->device == 0x6811))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
+ } else {
+ strcpy(fw_name, "radeon/pitcairn_smc.bin");
+ }
+ break;
+ case CHIP_VERDE:
+ if (((adev->pdev->device == 0x6820) &&
+ ((adev->pdev->revision == 0x81) ||
+ (adev->pdev->revision == 0x83))) ||
+ ((adev->pdev->device == 0x6821) &&
+ ((adev->pdev->revision == 0x83) ||
+ (adev->pdev->revision == 0x87))) ||
+ ((adev->pdev->revision == 0x87) &&
+ ((adev->pdev->device == 0x6823) ||
+ (adev->pdev->device == 0x682b)))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "radeon/verde_k_smc.bin");
+ } else {
+ strcpy(fw_name, "radeon/verde_smc.bin");
+ }
+ break;
+ case CHIP_OLAND:
+ if (((adev->pdev->revision == 0x81) &&
+ ((adev->pdev->device == 0x6600) ||
+ (adev->pdev->device == 0x6604) ||
+ (adev->pdev->device == 0x6605) ||
+ (adev->pdev->device == 0x6610))) ||
+ ((adev->pdev->revision == 0x83) &&
+ (adev->pdev->device == 0x6610))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "radeon/oland_k_smc.bin");
+ } else {
+ strcpy(fw_name, "radeon/oland_smc.bin");
+ }
+ break;
+ case CHIP_HAINAN:
+ if (((adev->pdev->revision == 0x81) &&
+ (adev->pdev->device == 0x6660)) ||
+ ((adev->pdev->revision == 0x83) &&
+ ((adev->pdev->device == 0x6660) ||
+ (adev->pdev->device == 0x6663) ||
+ (adev->pdev->device == 0x6665) ||
+ (adev->pdev->device == 0x6667)))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "radeon/hainan_k_smc.bin");
+ } else if ((adev->pdev->revision == 0xc3) &&
+ (adev->pdev->device == 0x6665)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "radeon/banks_k_2_smc.bin");
+ } else {
+ strcpy(fw_name, "radeon/hainan_smc.bin");
+ }
+ break;
+ case CHIP_BONAIRE:
+ if ((adev->pdev->revision == 0x80) ||
+ (adev->pdev->revision == 0x81) ||
+ (adev->pdev->device == 0x665f)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+ } else {
+ strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+ }
+ break;
+ case CHIP_HAWAII:
+ if (adev->pdev->revision == 0x80) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+ } else {
+ strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+ }
+ break;
+ case CHIP_TOPAZ:
+ if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
+ ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
+ ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) ||
+ ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
+ ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+ } else
+ strcpy(fw_name, "amdgpu/topaz_smc.bin");
+ break;
+ case CHIP_TONGA:
+ if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
+ ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+ } else
+ strcpy(fw_name, "amdgpu/tonga_smc.bin");
+ break;
+ case CHIP_FIJI:
+ strcpy(fw_name, "amdgpu/fiji_smc.bin");
+ break;
+ case CHIP_POLARIS11:
+ if (type == CGS_UCODE_ID_SMU) {
+ if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+ } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+ } else {
+ strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+ }
+ } else if (type == CGS_UCODE_ID_SMU_SK) {
+ strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+ }
+ break;
+ case CHIP_POLARIS10:
+ if (type == CGS_UCODE_ID_SMU) {
+ if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+ } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+ } else {
+ strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+ }
+ } else if (type == CGS_UCODE_ID_SMU_SK) {
+ strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+ }
+ break;
+ case CHIP_POLARIS12:
+ if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
+ info->is_kicker = true;
+ strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+ } else {
+ strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+ }
+ break;
+ case CHIP_VEGAM:
+ strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ break;
+ case CHIP_VEGA10:
+ if ((adev->pdev->device == 0x687f) &&
+ ((adev->pdev->revision == 0xc0) ||
+ (adev->pdev->revision == 0xc1) ||
+ (adev->pdev->revision == 0xc3)))
+ strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+ else
+ strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ break;
+ case CHIP_VEGA12:
+ strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ break;
+ case CHIP_VEGA20:
+ strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ break;
+ default:
+ DRM_ERROR("SMC firmware not supported\n");
+ return -EINVAL;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
+ if (err) {
+ DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
+ amdgpu_ucode_release(&adev->pm.fw);
+ return err;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+ ucode->ucode_id = AMDGPU_UCODE_ID_SMC;
+ ucode->fw = adev->pm.fw;
+ header = (const struct common_firmware_header *)ucode->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(&hdr->header);
+ adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
+ ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
+ src = (const uint8_t *)(adev->pm.fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ info->version = adev->pm.fw_version;
+ info->image_size = ucode_size;
+ info->ucode_start_address = ucode_start_address;
+ info->kptr = (void *)src;
+ }
+ return 0;
+}
+
+static const struct cgs_ops amdgpu_cgs_ops = {
+ .read_register = amdgpu_cgs_read_register,
+ .write_register = amdgpu_cgs_write_register,
+ .read_ind_register = amdgpu_cgs_read_ind_register,
+ .write_ind_register = amdgpu_cgs_write_ind_register,
+ .get_firmware_info = amdgpu_cgs_get_firmware_info,
+};
+
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
+{
+ struct amdgpu_cgs_device *cgs_device =
+ kmalloc(sizeof(*cgs_device), GFP_KERNEL);
+
+ if (!cgs_device) {
+ DRM_ERROR("Couldn't allocate CGS device structure\n");
+ return NULL;
+ }
+
+ cgs_device->base.ops = &amdgpu_cgs_ops;
+ cgs_device->adev = adev;
+
+ return (struct cgs_device *)cgs_device;
+}
+
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device)
+{
+ kfree(cgs_device);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
new file mode 100644
index 000000000..d34037b85
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -0,0 +1,2033 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
+
+#include <linux/pm_runtime.h>
+
+void amdgpu_connector_hotplug(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* bail if the connector does not have hpd pin, e.g.,
+ * VGA, TV, etc.
+ */
+ if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE)
+ return;
+
+ amdgpu_display_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+
+ /* if the connector is already off, don't turn it back on */
+ if (connector->dpms != DRM_MODE_DPMS_ON)
+ return;
+
+ /* just deal with DP (not eDP) here. */
+ if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+
+ /* if existing sink type was not DP no need to retrain */
+ if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT)
+ return;
+
+ /* first get sink type as it may be reset after (un)plug */
+ dig_connector->dp_sink_type = amdgpu_atombios_dp_get_sinktype(amdgpu_connector);
+ /* don't do anything if sink is not display port, i.e.,
+ * passive dp->(dvi|hdmi) adaptor
+ */
+ if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+ amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+ /* Don't start link training before we have the DPCD */
+ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ return;
+
+ /* Turn the connector off and back on immediately, which
+ * will trigger link training
+ */
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+ }
+ }
+}
+
+static void amdgpu_connector_property_change_mode(struct drm_encoder *encoder)
+{
+ struct drm_crtc *crtc = encoder->crtc;
+
+ if (crtc && crtc->enabled) {
+ drm_crtc_helper_set_mode(crtc, &crtc->mode,
+ crtc->x, crtc->y, crtc->primary->fb);
+ }
+}
+
+int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector;
+ int bpc = 8;
+ unsigned mode_clock, max_tmds_clock;
+
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_HDMIB:
+ if (amdgpu_connector->use_digital) {
+ if (connector->display_info.is_hdmi) {
+ if (connector->display_info.bpc)
+ bpc = connector->display_info.bpc;
+ }
+ }
+ break;
+ case DRM_MODE_CONNECTOR_DVID:
+ case DRM_MODE_CONNECTOR_HDMIA:
+ if (connector->display_info.is_hdmi) {
+ if (connector->display_info.bpc)
+ bpc = connector->display_info.bpc;
+ }
+ break;
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ dig_connector = amdgpu_connector->con_priv;
+ if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+ (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) ||
+ connector->display_info.is_hdmi) {
+ if (connector->display_info.bpc)
+ bpc = connector->display_info.bpc;
+ }
+ break;
+ case DRM_MODE_CONNECTOR_eDP:
+ case DRM_MODE_CONNECTOR_LVDS:
+ if (connector->display_info.bpc)
+ bpc = connector->display_info.bpc;
+ else {
+ const struct drm_connector_helper_funcs *connector_funcs =
+ connector->helper_private;
+ struct drm_encoder *encoder = connector_funcs->best_encoder(connector);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (dig->lcd_misc & ATOM_PANEL_MISC_V13_6BIT_PER_COLOR)
+ bpc = 6;
+ else if (dig->lcd_misc & ATOM_PANEL_MISC_V13_8BIT_PER_COLOR)
+ bpc = 8;
+ }
+ break;
+ }
+
+ if (connector->display_info.is_hdmi) {
+ /*
+ * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make
+ * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at
+ * 12 bpc is always supported on hdmi deep color sinks, as this is
+ * required by the HDMI-1.3 spec. Clamp to a safe 12 bpc maximum.
+ */
+ if (bpc > 12) {
+ DRM_DEBUG("%s: HDMI deep color %d bpc unsupported. Using 12 bpc.\n",
+ connector->name, bpc);
+ bpc = 12;
+ }
+
+ /* Any defined maximum tmds clock limit we must not exceed? */
+ if (connector->display_info.max_tmds_clock > 0) {
+ /* mode_clock is clock in kHz for mode to be modeset on this connector */
+ mode_clock = amdgpu_connector->pixelclock_for_modeset;
+
+ /* Maximum allowable input clock in kHz */
+ max_tmds_clock = connector->display_info.max_tmds_clock;
+
+ DRM_DEBUG("%s: hdmi mode dotclock %d kHz, max tmds input clock %d kHz.\n",
+ connector->name, mode_clock, max_tmds_clock);
+
+ /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+ if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
+ (mode_clock * 5/4 <= max_tmds_clock))
+ bpc = 10;
+ else
+ bpc = 8;
+
+ DRM_DEBUG("%s: HDMI deep color 12 bpc exceeds max tmds clock. Using %d bpc.\n",
+ connector->name, bpc);
+ }
+
+ if ((bpc == 10) && (mode_clock * 5/4 > max_tmds_clock)) {
+ bpc = 8;
+ DRM_DEBUG("%s: HDMI deep color 10 bpc exceeds max tmds clock. Using %d bpc.\n",
+ connector->name, bpc);
+ }
+ } else if (bpc > 8) {
+ /* max_tmds_clock missing, but hdmi spec mandates it for deep color. */
+ DRM_DEBUG("%s: Required max tmds clock for HDMI deep color missing. Using 8 bpc.\n",
+ connector->name);
+ bpc = 8;
+ }
+ }
+
+ if ((amdgpu_deep_color == 0) && (bpc > 8)) {
+ DRM_DEBUG("%s: Deep color disabled. Set amdgpu module param deep_color=1 to enable.\n",
+ connector->name);
+ bpc = 8;
+ }
+
+ DRM_DEBUG("%s: Display bpc=%d, returned bpc=%d\n",
+ connector->name, connector->display_info.bpc, bpc);
+
+ return bpc;
+}
+
+static void
+amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
+ enum drm_connector_status status)
+{
+ struct drm_encoder *best_encoder;
+ struct drm_encoder *encoder;
+ const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+ bool connected;
+
+ best_encoder = connector_funcs->best_encoder(connector);
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ if ((encoder == best_encoder) && (status == connector_status_connected))
+ connected = true;
+ else
+ connected = false;
+
+ amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected);
+ }
+}
+
+static struct drm_encoder *
+amdgpu_connector_find_encoder(struct drm_connector *connector,
+ int encoder_type)
+{
+ struct drm_encoder *encoder;
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ if (encoder->encoder_type == encoder_type)
+ return encoder;
+ }
+
+ return NULL;
+}
+
+struct edid *amdgpu_connector_edid(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_property_blob *edid_blob = connector->edid_blob_ptr;
+
+ if (amdgpu_connector->edid) {
+ return amdgpu_connector->edid;
+ } else if (edid_blob) {
+ struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
+ if (edid)
+ amdgpu_connector->edid = edid;
+ }
+ return amdgpu_connector->edid;
+}
+
+static struct edid *
+amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
+{
+ struct edid *edid;
+
+ if (adev->mode_info.bios_hardcoded_edid) {
+ edid = kmalloc(adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
+ if (edid) {
+ memcpy((unsigned char *)edid,
+ (unsigned char *)adev->mode_info.bios_hardcoded_edid,
+ adev->mode_info.bios_hardcoded_edid_size);
+ return edid;
+ }
+ }
+ return NULL;
+}
+
+static void amdgpu_connector_get_edid(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->edid)
+ return;
+
+ /* on hw with routers, select right port */
+ if (amdgpu_connector->router.ddc_valid)
+ amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
+
+ if ((amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+ ENCODER_OBJECT_ID_NONE) &&
+ amdgpu_connector->ddc_bus->has_aux) {
+ amdgpu_connector->edid = drm_get_edid(connector,
+ &amdgpu_connector->ddc_bus->aux.ddc);
+ } else if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+ struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
+
+ if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
+ dig->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) &&
+ amdgpu_connector->ddc_bus->has_aux)
+ amdgpu_connector->edid = drm_get_edid(connector,
+ &amdgpu_connector->ddc_bus->aux.ddc);
+ else if (amdgpu_connector->ddc_bus)
+ amdgpu_connector->edid = drm_get_edid(connector,
+ &amdgpu_connector->ddc_bus->adapter);
+ } else if (amdgpu_connector->ddc_bus) {
+ amdgpu_connector->edid = drm_get_edid(connector,
+ &amdgpu_connector->ddc_bus->adapter);
+ }
+
+ if (!amdgpu_connector->edid) {
+ /* some laptops provide a hardcoded edid in rom for LCDs */
+ if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
+ amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+ }
+ }
+}
+
+static void amdgpu_connector_free_edid(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ kfree(amdgpu_connector->edid);
+ amdgpu_connector->edid = NULL;
+}
+
+static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ int ret;
+
+ if (amdgpu_connector->edid) {
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+ ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
+ return ret;
+ }
+ drm_connector_update_edid_property(connector, NULL);
+ return 0;
+}
+
+static struct drm_encoder *
+amdgpu_connector_best_single_encoder(struct drm_connector *connector)
+{
+ struct drm_encoder *encoder;
+
+ /* pick the first one */
+ drm_connector_for_each_possible_encoder(connector, encoder)
+ return encoder;
+
+ return NULL;
+}
+
+static void amdgpu_get_native_mode(struct drm_connector *connector)
+{
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ if (encoder == NULL)
+ return;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (!list_empty(&connector->probed_modes)) {
+ struct drm_display_mode *preferred_mode =
+ list_first_entry(&connector->probed_modes,
+ struct drm_display_mode, head);
+
+ amdgpu_encoder->native_mode = *preferred_mode;
+ } else {
+ amdgpu_encoder->native_mode.clock = 0;
+ }
+}
+
+static struct drm_display_mode *
+amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *mode = NULL;
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+ if (native_mode->hdisplay != 0 &&
+ native_mode->vdisplay != 0 &&
+ native_mode->clock != 0) {
+ mode = drm_mode_duplicate(dev, native_mode);
+ if (!mode)
+ return NULL;
+
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ drm_mode_set_name(mode);
+
+ DRM_DEBUG_KMS("Adding native panel mode %s\n", mode->name);
+ } else if (native_mode->hdisplay != 0 &&
+ native_mode->vdisplay != 0) {
+ /* mac laptops without an edid */
+ /* Note that this is not necessarily the exact panel mode,
+ * but an approximation based on the cvt formula. For these
+ * systems we should ideally read the mode info out of the
+ * registers or add a mode table, but this works and is much
+ * simpler.
+ */
+ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
+ if (!mode)
+ return NULL;
+
+ mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+ DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
+ }
+ return mode;
+}
+
+static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *mode = NULL;
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+ int i;
+ static const struct mode_size {
+ int w;
+ int h;
+ } common_modes[17] = {
+ { 640, 480},
+ { 720, 480},
+ { 800, 600},
+ { 848, 480},
+ {1024, 768},
+ {1152, 768},
+ {1280, 720},
+ {1280, 800},
+ {1280, 854},
+ {1280, 960},
+ {1280, 1024},
+ {1440, 900},
+ {1400, 1050},
+ {1680, 1050},
+ {1600, 1200},
+ {1920, 1080},
+ {1920, 1200}
+ };
+
+ for (i = 0; i < 17; i++) {
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+ if (common_modes[i].w > 1024 ||
+ common_modes[i].h > 768)
+ continue;
+ }
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ if (common_modes[i].w > native_mode->hdisplay ||
+ common_modes[i].h > native_mode->vdisplay ||
+ (common_modes[i].w == native_mode->hdisplay &&
+ common_modes[i].h == native_mode->vdisplay))
+ continue;
+ }
+ if (common_modes[i].w < 320 || common_modes[i].h < 200)
+ continue;
+
+ mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ drm_mode_probed_add(connector, mode);
+ }
+}
+
+static int amdgpu_connector_set_property(struct drm_connector *connector,
+ struct drm_property *property,
+ uint64_t val)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ if (property == adev->mode_info.coherent_mode_property) {
+ struct amdgpu_encoder_atom_dig *dig;
+ bool new_coherent_mode;
+
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (!amdgpu_encoder->enc_priv)
+ return 0;
+
+ dig = amdgpu_encoder->enc_priv;
+ new_coherent_mode = val ? true : false;
+ if (dig->coherent_mode != new_coherent_mode) {
+ dig->coherent_mode = new_coherent_mode;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.audio_property) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (amdgpu_connector->audio != val) {
+ amdgpu_connector->audio = val;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.dither_property) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (amdgpu_connector->dither != val) {
+ amdgpu_connector->dither = val;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.underscan_property) {
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (amdgpu_encoder->underscan_type != val) {
+ amdgpu_encoder->underscan_type = val;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.underscan_hborder_property) {
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (amdgpu_encoder->underscan_hborder != val) {
+ amdgpu_encoder->underscan_hborder = val;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.underscan_vborder_property) {
+ /* need to find digital encoder on connector */
+ encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+ if (!encoder)
+ return 0;
+
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ if (amdgpu_encoder->underscan_vborder != val) {
+ amdgpu_encoder->underscan_vborder = val;
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+ }
+
+ if (property == adev->mode_info.load_detect_property) {
+ struct amdgpu_connector *amdgpu_connector =
+ to_amdgpu_connector(connector);
+
+ if (val == 0)
+ amdgpu_connector->dac_load_detect = false;
+ else
+ amdgpu_connector->dac_load_detect = true;
+ }
+
+ if (property == dev->mode_config.scaling_mode_property) {
+ enum amdgpu_rmx_type rmx_type;
+
+ if (connector->encoder) {
+ amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
+ } else {
+ const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+ amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
+ }
+
+ switch (val) {
+ default:
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
+ }
+
+ if (amdgpu_encoder->rmx_type == rmx_type)
+ return 0;
+
+ if ((rmx_type != DRM_MODE_SCALE_NONE) &&
+ (amdgpu_encoder->native_mode.clock == 0))
+ return 0;
+
+ amdgpu_encoder->rmx_type = rmx_type;
+
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ }
+
+ return 0;
+}
+
+static void
+amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+ struct drm_display_mode *t, *mode;
+
+ /* If the EDID preferred mode doesn't match the native mode, use it */
+ list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+ if (mode->type & DRM_MODE_TYPE_PREFERRED) {
+ if (mode->hdisplay != native_mode->hdisplay ||
+ mode->vdisplay != native_mode->vdisplay)
+ drm_mode_copy(native_mode, mode);
+ }
+ }
+
+ /* Try to get native mode details from EDID if necessary */
+ if (!native_mode->clock) {
+ list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+ if (mode->hdisplay == native_mode->hdisplay &&
+ mode->vdisplay == native_mode->vdisplay) {
+ drm_mode_copy(native_mode, mode);
+ drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V);
+ DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n");
+ break;
+ }
+ }
+ }
+
+ if (!native_mode->clock) {
+ DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n");
+ amdgpu_encoder->rmx_type = RMX_OFF;
+ }
+}
+
+static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
+{
+ struct drm_encoder *encoder;
+ int ret = 0;
+ struct drm_display_mode *mode;
+
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
+ if (ret > 0) {
+ encoder = amdgpu_connector_best_single_encoder(connector);
+ if (encoder) {
+ amdgpu_connector_fixup_lcd_native_mode(encoder, connector);
+ /* add scaled modes */
+ amdgpu_connector_add_common_modes(encoder, connector);
+ }
+ return ret;
+ }
+
+ encoder = amdgpu_connector_best_single_encoder(connector);
+ if (!encoder)
+ return 0;
+
+ /* we have no EDID modes */
+ mode = amdgpu_connector_lcd_native_mode(encoder);
+ if (mode) {
+ ret = 1;
+ drm_mode_probed_add(connector, mode);
+ /* add the width/height from vbios tables if available */
+ connector->display_info.width_mm = mode->width_mm;
+ connector->display_info.height_mm = mode->height_mm;
+ /* add scaled modes */
+ amdgpu_connector_add_common_modes(encoder, connector);
+ }
+
+ return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+
+ if ((mode->hdisplay < 320) || (mode->vdisplay < 240))
+ return MODE_PANEL;
+
+ if (encoder) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+ /* AVIVO hardware supports downscaling modes larger than the panel
+ * to the panel size, but I'm not sure this is desirable.
+ */
+ if ((mode->hdisplay > native_mode->hdisplay) ||
+ (mode->vdisplay > native_mode->vdisplay))
+ return MODE_PANEL;
+
+ /* if scaling is disabled, block non-native modes */
+ if (amdgpu_encoder->rmx_type == RMX_OFF) {
+ if ((mode->hdisplay != native_mode->hdisplay) ||
+ (mode->vdisplay != native_mode->vdisplay))
+ return MODE_PANEL;
+ }
+ }
+
+ return MODE_OK;
+}
+
+static enum drm_connector_status
+amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+ enum drm_connector_status ret = connector_status_disconnected;
+ int r;
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ return connector_status_disconnected;
+ }
+ }
+
+ if (encoder) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+ /* check if panel is valid */
+ if (native_mode->hdisplay >= 320 && native_mode->vdisplay >= 240)
+ ret = connector_status_connected;
+
+ }
+
+ /* check for edid as well */
+ amdgpu_connector_get_edid(connector);
+ if (amdgpu_connector->edid)
+ ret = connector_status_connected;
+ /* check acpi lid status ??? */
+
+ amdgpu_connector_update_scratch_regs(connector, ret);
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
+ return ret;
+}
+
+static void amdgpu_connector_unregister(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->ddc_bus && amdgpu_connector->ddc_bus->has_aux) {
+ drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux);
+ amdgpu_connector->ddc_bus->has_aux = false;
+ }
+}
+
+static void amdgpu_connector_destroy(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ amdgpu_connector_free_edid(connector);
+ kfree(amdgpu_connector->con_priv);
+ drm_connector_unregister(connector);
+ drm_connector_cleanup(connector);
+ kfree(connector);
+}
+
+static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
+ struct drm_property *property,
+ uint64_t value)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_encoder *amdgpu_encoder;
+ enum amdgpu_rmx_type rmx_type;
+
+ DRM_DEBUG_KMS("\n");
+ if (property != dev->mode_config.scaling_mode_property)
+ return 0;
+
+ if (connector->encoder)
+ amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
+ else {
+ const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+ amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
+ }
+
+ switch (value) {
+ case DRM_MODE_SCALE_NONE:
+ rmx_type = RMX_OFF;
+ break;
+ case DRM_MODE_SCALE_CENTER:
+ rmx_type = RMX_CENTER;
+ break;
+ case DRM_MODE_SCALE_ASPECT:
+ rmx_type = RMX_ASPECT;
+ break;
+ default:
+ case DRM_MODE_SCALE_FULLSCREEN:
+ rmx_type = RMX_FULL;
+ break;
+ }
+
+ if (amdgpu_encoder->rmx_type == rmx_type)
+ return 0;
+
+ amdgpu_encoder->rmx_type = rmx_type;
+
+ amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+ return 0;
+}
+
+
+static const struct drm_connector_helper_funcs amdgpu_connector_lvds_helper_funcs = {
+ .get_modes = amdgpu_connector_lvds_get_modes,
+ .mode_valid = amdgpu_connector_lvds_mode_valid,
+ .best_encoder = amdgpu_connector_best_single_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_lvds_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .detect = amdgpu_connector_lvds_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .early_unregister = amdgpu_connector_unregister,
+ .destroy = amdgpu_connector_destroy,
+ .set_property = amdgpu_connector_set_lcd_property,
+};
+
+static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
+{
+ int ret;
+
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
+ amdgpu_get_native_mode(connector);
+
+ return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* XXX check mode bandwidth */
+
+ if ((mode->clock / 10) > adev->clock.max_pixel_clock)
+ return MODE_CLOCK_HIGH;
+
+ return MODE_OK;
+}
+
+static enum drm_connector_status
+amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_encoder *encoder;
+ const struct drm_encoder_helper_funcs *encoder_funcs;
+ bool dret = false;
+ enum drm_connector_status ret = connector_status_disconnected;
+ int r;
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ return connector_status_disconnected;
+ }
+ }
+
+ encoder = amdgpu_connector_best_single_encoder(connector);
+ if (!encoder)
+ ret = connector_status_disconnected;
+
+ if (amdgpu_connector->ddc_bus)
+ dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+ if (dret) {
+ amdgpu_connector->detected_by_load = false;
+ amdgpu_connector_free_edid(connector);
+ amdgpu_connector_get_edid(connector);
+
+ if (!amdgpu_connector->edid) {
+ DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
+ connector->name);
+ ret = connector_status_connected;
+ } else {
+ amdgpu_connector->use_digital =
+ !!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+
+ /* some oems have boards with separate digital and analog connectors
+ * with a shared ddc line (often vga + hdmi)
+ */
+ if (amdgpu_connector->use_digital && amdgpu_connector->shared_ddc) {
+ amdgpu_connector_free_edid(connector);
+ ret = connector_status_disconnected;
+ } else {
+ ret = connector_status_connected;
+ }
+ }
+ } else {
+
+ /* if we aren't forcing don't do destructive polling */
+ if (!force) {
+ /* only return the previous status if we last
+ * detected a monitor via load.
+ */
+ if (amdgpu_connector->detected_by_load)
+ ret = connector->status;
+ goto out;
+ }
+
+ if (amdgpu_connector->dac_load_detect && encoder) {
+ encoder_funcs = encoder->helper_private;
+ ret = encoder_funcs->detect(encoder, connector);
+ if (ret != connector_status_disconnected)
+ amdgpu_connector->detected_by_load = true;
+ }
+ }
+
+ amdgpu_connector_update_scratch_regs(connector, ret);
+
+out:
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
+ return ret;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_vga_helper_funcs = {
+ .get_modes = amdgpu_connector_vga_get_modes,
+ .mode_valid = amdgpu_connector_vga_mode_valid,
+ .best_encoder = amdgpu_connector_best_single_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_vga_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .detect = amdgpu_connector_vga_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .early_unregister = amdgpu_connector_unregister,
+ .destroy = amdgpu_connector_destroy,
+ .set_property = amdgpu_connector_set_property,
+};
+
+static bool
+amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ enum drm_connector_status status;
+
+ if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE) {
+ if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd))
+ status = connector_status_connected;
+ else
+ status = connector_status_disconnected;
+ if (connector->status == status)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * DVI is complicated
+ * Do a DDC probe, if DDC probe passes, get the full EDID so
+ * we can do analog/digital monitor detection at this point.
+ * If the monitor is an analog monitor or we got no DDC,
+ * we need to find the DAC encoder object for this connector.
+ * If we got no DDC, we do load detection on the DAC encoder object.
+ * If we got analog DDC or load detection passes on the DAC encoder
+ * we have to check if this analog encoder is shared with anyone else (TV)
+ * if its shared we have to set the other connector to disconnected.
+ */
+static enum drm_connector_status
+amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ const struct drm_encoder_helper_funcs *encoder_funcs;
+ int r;
+ enum drm_connector_status ret = connector_status_disconnected;
+ bool dret = false, broken_edid = false;
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ return connector_status_disconnected;
+ }
+ }
+
+ if (amdgpu_connector->detected_hpd_without_ddc) {
+ force = true;
+ amdgpu_connector->detected_hpd_without_ddc = false;
+ }
+
+ if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
+ ret = connector->status;
+ goto exit;
+ }
+
+ if (amdgpu_connector->ddc_bus) {
+ dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+ /* Sometimes the pins required for the DDC probe on DVI
+ * connectors don't make contact at the same time that the ones
+ * for HPD do. If the DDC probe fails even though we had an HPD
+ * signal, try again later
+ */
+ if (!dret && !force &&
+ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+ DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+ amdgpu_connector->detected_hpd_without_ddc = true;
+ schedule_delayed_work(&adev->hotplug_work,
+ msecs_to_jiffies(1000));
+ goto exit;
+ }
+ }
+ if (dret) {
+ amdgpu_connector->detected_by_load = false;
+ amdgpu_connector_free_edid(connector);
+ amdgpu_connector_get_edid(connector);
+
+ if (!amdgpu_connector->edid) {
+ DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
+ connector->name);
+ ret = connector_status_connected;
+ broken_edid = true; /* defer use_digital to later */
+ } else {
+ amdgpu_connector->use_digital =
+ !!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+
+ /* some oems have boards with separate digital and analog connectors
+ * with a shared ddc line (often vga + hdmi)
+ */
+ if ((!amdgpu_connector->use_digital) && amdgpu_connector->shared_ddc) {
+ amdgpu_connector_free_edid(connector);
+ ret = connector_status_disconnected;
+ } else {
+ ret = connector_status_connected;
+ }
+
+ /* This gets complicated. We have boards with VGA + HDMI with a
+ * shared DDC line and we have boards with DVI-D + HDMI with a shared
+ * DDC line. The latter is more complex because with DVI<->HDMI adapters
+ * you don't really know what's connected to which port as both are digital.
+ */
+ if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *list_amdgpu_connector;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
+ if (connector == list_connector)
+ continue;
+ list_amdgpu_connector = to_amdgpu_connector(list_connector);
+ if (list_amdgpu_connector->shared_ddc &&
+ (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+ amdgpu_connector->ddc_bus->rec.i2c_id)) {
+ /* cases where both connectors are digital */
+ if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+ /* hpd is our only option in this case */
+ if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+ amdgpu_connector_free_edid(connector);
+ ret = connector_status_disconnected;
+ }
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ }
+ }
+ }
+
+ if ((ret == connector_status_connected) && (amdgpu_connector->use_digital == true))
+ goto out;
+
+ /* DVI-D and HDMI-A are digital only */
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+ goto out;
+
+ /* if we aren't forcing don't do destructive polling */
+ if (!force) {
+ /* only return the previous status if we last
+ * detected a monitor via load.
+ */
+ if (amdgpu_connector->detected_by_load)
+ ret = connector->status;
+ goto out;
+ }
+
+ /* find analog encoder */
+ if (amdgpu_connector->dac_load_detect) {
+ struct drm_encoder *encoder;
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
+ encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
+ continue;
+
+ encoder_funcs = encoder->helper_private;
+ if (encoder_funcs->detect) {
+ if (!broken_edid) {
+ if (ret != connector_status_connected) {
+ /* deal with analog monitors without DDC */
+ ret = encoder_funcs->detect(encoder, connector);
+ if (ret == connector_status_connected) {
+ amdgpu_connector->use_digital = false;
+ }
+ if (ret != connector_status_disconnected)
+ amdgpu_connector->detected_by_load = true;
+ }
+ } else {
+ enum drm_connector_status lret;
+ /* assume digital unless load detected otherwise */
+ amdgpu_connector->use_digital = true;
+ lret = encoder_funcs->detect(encoder, connector);
+ DRM_DEBUG_KMS("load_detect %x returned: %x\n",
+ encoder->encoder_type, lret);
+ if (lret == connector_status_connected)
+ amdgpu_connector->use_digital = false;
+ }
+ break;
+ }
+ }
+ }
+
+out:
+ /* updated in get modes as well since we need to know if it's analog or digital */
+ amdgpu_connector_update_scratch_regs(connector, ret);
+
+exit:
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
+ return ret;
+}
+
+/* okay need to be smart in here about which encoder to pick */
+static struct drm_encoder *
+amdgpu_connector_dvi_encoder(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_encoder *encoder;
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ if (amdgpu_connector->use_digital == true) {
+ if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
+ return encoder;
+ } else {
+ if (encoder->encoder_type == DRM_MODE_ENCODER_DAC ||
+ encoder->encoder_type == DRM_MODE_ENCODER_TVDAC)
+ return encoder;
+ }
+ }
+
+ /* see if we have a default encoder TODO */
+
+ /* then check use digitial */
+ /* pick the first one */
+ drm_connector_for_each_possible_encoder(connector, encoder)
+ return encoder;
+
+ return NULL;
+}
+
+static void amdgpu_connector_dvi_force(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ if (connector->force == DRM_FORCE_ON)
+ amdgpu_connector->use_digital = false;
+ if (connector->force == DRM_FORCE_ON_DIGITAL)
+ amdgpu_connector->use_digital = true;
+}
+
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* XXX check mode bandwidth */
+
+ if (amdgpu_connector->use_digital && (mode->clock > 165000)) {
+ if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) ||
+ (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) ||
+ (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) {
+ return MODE_OK;
+ } else if (connector->display_info.is_hdmi) {
+ /* HDMI 1.3+ supports max clock of 340 Mhz */
+ if (mode->clock > 340000)
+ return MODE_CLOCK_HIGH;
+ else
+ return MODE_OK;
+ } else {
+ return MODE_CLOCK_HIGH;
+ }
+ }
+
+ /* check against the max pixel clock */
+ if ((mode->clock / 10) > adev->clock.max_pixel_clock)
+ return MODE_CLOCK_HIGH;
+
+ return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_dvi_helper_funcs = {
+ .get_modes = amdgpu_connector_vga_get_modes,
+ .mode_valid = amdgpu_connector_dvi_mode_valid,
+ .best_encoder = amdgpu_connector_dvi_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_dvi_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .detect = amdgpu_connector_dvi_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .set_property = amdgpu_connector_set_property,
+ .early_unregister = amdgpu_connector_unregister,
+ .destroy = amdgpu_connector_destroy,
+ .force = amdgpu_connector_dvi_force,
+};
+
+static int amdgpu_connector_dp_get_modes(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+ int ret;
+
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+ struct drm_display_mode *mode;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (!amdgpu_dig_connector->edp_on)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
+ if (!amdgpu_dig_connector->edp_on)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_OFF);
+ } else {
+ /* need to setup ddc on the bridge */
+ if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+ ENCODER_OBJECT_ID_NONE) {
+ if (encoder)
+ amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+ }
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
+ }
+
+ if (ret > 0) {
+ if (encoder) {
+ amdgpu_connector_fixup_lcd_native_mode(encoder, connector);
+ /* add scaled modes */
+ amdgpu_connector_add_common_modes(encoder, connector);
+ }
+ return ret;
+ }
+
+ if (!encoder)
+ return 0;
+
+ /* we have no EDID modes */
+ mode = amdgpu_connector_lcd_native_mode(encoder);
+ if (mode) {
+ ret = 1;
+ drm_mode_probed_add(connector, mode);
+ /* add the width/height from vbios tables if available */
+ connector->display_info.width_mm = mode->width_mm;
+ connector->display_info.height_mm = mode->height_mm;
+ /* add scaled modes */
+ amdgpu_connector_add_common_modes(encoder, connector);
+ }
+ } else {
+ /* need to setup ddc on the bridge */
+ if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+ ENCODER_OBJECT_ID_NONE) {
+ if (encoder)
+ amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+ }
+ amdgpu_connector_get_edid(connector);
+ ret = amdgpu_connector_ddc_get_modes(connector);
+
+ amdgpu_get_native_mode(connector);
+ }
+
+ return ret;
+}
+
+u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector)
+{
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ return amdgpu_encoder->encoder_id;
+ default:
+ break;
+ }
+ }
+
+ return ENCODER_OBJECT_ID_NONE;
+}
+
+static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
+{
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+ bool found = false;
+
+ drm_connector_for_each_possible_encoder(connector, encoder) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
+ found = true;
+ }
+
+ return found;
+}
+
+bool amdgpu_connector_is_dp12_capable(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if ((adev->clock.default_dispclk >= 53900) &&
+ amdgpu_connector_encoder_is_hbr2(connector)) {
+ return true;
+ }
+
+ return false;
+}
+
+static enum drm_connector_status
+amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ enum drm_connector_status ret = connector_status_disconnected;
+ struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+ int r;
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ return connector_status_disconnected;
+ }
+ }
+
+ if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
+ ret = connector->status;
+ goto out;
+ }
+
+ amdgpu_connector_free_edid(connector);
+
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+ if (encoder) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+ /* check if panel is valid */
+ if (native_mode->hdisplay >= 320 && native_mode->vdisplay >= 240)
+ ret = connector_status_connected;
+ }
+ /* eDP is always DP */
+ amdgpu_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+ if (!amdgpu_dig_connector->edp_on)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ ret = connector_status_connected;
+ if (!amdgpu_dig_connector->edp_on)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_OFF);
+ } else if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+ ENCODER_OBJECT_ID_NONE) {
+ /* DP bridges are always DP */
+ amdgpu_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+ /* get the DPCD from the bridge */
+ amdgpu_atombios_dp_get_dpcd(amdgpu_connector);
+
+ if (encoder) {
+ /* setup ddc on the bridge */
+ amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+ /* bridge chips are always aux */
+ /* try DDC */
+ if (amdgpu_display_ddc_probe(amdgpu_connector, true))
+ ret = connector_status_connected;
+ else if (amdgpu_connector->dac_load_detect) { /* try load detection */
+ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+ ret = encoder_funcs->detect(encoder, connector);
+ }
+ }
+ } else {
+ amdgpu_dig_connector->dp_sink_type =
+ amdgpu_atombios_dp_get_sinktype(amdgpu_connector);
+ if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+ ret = connector_status_connected;
+ if (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
+ amdgpu_atombios_dp_get_dpcd(amdgpu_connector);
+ } else {
+ if (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
+ if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ ret = connector_status_connected;
+ } else {
+ /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
+ if (amdgpu_display_ddc_probe(amdgpu_connector,
+ false))
+ ret = connector_status_connected;
+ }
+ }
+ }
+
+ amdgpu_connector_update_scratch_regs(connector, ret);
+out:
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+ connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ drm_dp_set_subconnector_property(&amdgpu_connector->base,
+ ret,
+ amdgpu_dig_connector->dpcd,
+ amdgpu_dig_connector->downstream_ports);
+ return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+
+ /* XXX check mode bandwidth */
+
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+ struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+
+ if ((mode->hdisplay < 320) || (mode->vdisplay < 240))
+ return MODE_PANEL;
+
+ if (encoder) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+ /* AVIVO hardware supports downscaling modes larger than the panel
+ * to the panel size, but I'm not sure this is desirable.
+ */
+ if ((mode->hdisplay > native_mode->hdisplay) ||
+ (mode->vdisplay > native_mode->vdisplay))
+ return MODE_PANEL;
+
+ /* if scaling is disabled, block non-native modes */
+ if (amdgpu_encoder->rmx_type == RMX_OFF) {
+ if ((mode->hdisplay != native_mode->hdisplay) ||
+ (mode->vdisplay != native_mode->vdisplay))
+ return MODE_PANEL;
+ }
+ }
+ return MODE_OK;
+ } else {
+ if ((amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+ (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+ return amdgpu_atombios_dp_mode_valid_helper(connector, mode);
+ } else {
+ if (connector->display_info.is_hdmi) {
+ /* HDMI 1.3+ supports max clock of 340 Mhz */
+ if (mode->clock > 340000)
+ return MODE_CLOCK_HIGH;
+ } else {
+ if (mode->clock > 165000)
+ return MODE_CLOCK_HIGH;
+ }
+ }
+ }
+
+ return MODE_OK;
+}
+
+static int
+amdgpu_connector_late_register(struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ int r = 0;
+
+ if (amdgpu_connector->ddc_bus->has_aux) {
+ amdgpu_connector->ddc_bus->aux.dev = amdgpu_connector->base.kdev;
+ r = drm_dp_aux_register(&amdgpu_connector->ddc_bus->aux);
+ }
+
+ return r;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_dp_helper_funcs = {
+ .get_modes = amdgpu_connector_dp_get_modes,
+ .mode_valid = amdgpu_connector_dp_mode_valid,
+ .best_encoder = amdgpu_connector_dvi_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_dp_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .detect = amdgpu_connector_dp_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .set_property = amdgpu_connector_set_property,
+ .early_unregister = amdgpu_connector_unregister,
+ .destroy = amdgpu_connector_destroy,
+ .force = amdgpu_connector_dvi_force,
+ .late_register = amdgpu_connector_late_register,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_edp_funcs = {
+ .dpms = drm_helper_connector_dpms,
+ .detect = amdgpu_connector_dp_detect,
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .set_property = amdgpu_connector_set_lcd_property,
+ .early_unregister = amdgpu_connector_unregister,
+ .destroy = amdgpu_connector_destroy,
+ .force = amdgpu_connector_dvi_force,
+ .late_register = amdgpu_connector_late_register,
+};
+
+void
+amdgpu_connector_add(struct amdgpu_device *adev,
+ uint32_t connector_id,
+ uint32_t supported_device,
+ int connector_type,
+ struct amdgpu_i2c_bus_rec *i2c_bus,
+ uint16_t connector_object_id,
+ struct amdgpu_hpd *hpd,
+ struct amdgpu_router *router)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector;
+ struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+ struct i2c_adapter *ddc = NULL;
+ uint32_t subpixel_order = SubPixelNone;
+ bool shared_ddc = false;
+ bool is_dp_bridge = false;
+ bool has_aux = false;
+
+ if (connector_type == DRM_MODE_CONNECTOR_Unknown)
+ return;
+
+ /* see if we already added it */
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ if (amdgpu_connector->connector_id == connector_id) {
+ amdgpu_connector->devices |= supported_device;
+ drm_connector_list_iter_end(&iter);
+ return;
+ }
+ if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
+ if (amdgpu_connector->ddc_bus->rec.i2c_id == i2c_bus->i2c_id) {
+ amdgpu_connector->shared_ddc = true;
+ shared_ddc = true;
+ }
+ if (amdgpu_connector->router_bus && router->ddc_valid &&
+ (amdgpu_connector->router.router_id == router->router_id)) {
+ amdgpu_connector->shared_ddc = false;
+ shared_ddc = false;
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ /* check if it's a dp bridge */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & supported_device) {
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ is_dp_bridge = true;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ amdgpu_connector = kzalloc(sizeof(struct amdgpu_connector), GFP_KERNEL);
+ if (!amdgpu_connector)
+ return;
+
+ connector = &amdgpu_connector->base;
+
+ amdgpu_connector->connector_id = connector_id;
+ amdgpu_connector->devices = supported_device;
+ amdgpu_connector->shared_ddc = shared_ddc;
+ amdgpu_connector->connector_object_id = connector_object_id;
+ amdgpu_connector->hpd = *hpd;
+
+ amdgpu_connector->router = *router;
+ if (router->ddc_valid || router->cd_valid) {
+ amdgpu_connector->router_bus = amdgpu_i2c_lookup(adev, &router->i2c_info);
+ if (!amdgpu_connector->router_bus)
+ DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
+ }
+
+ if (is_dp_bridge) {
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (amdgpu_connector->ddc_bus) {
+ has_aux = true;
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
+ DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
+ }
+ switch (connector_type) {
+ case DRM_MODE_CONNECTOR_VGA:
+ case DRM_MODE_CONNECTOR_DVIA:
+ default:
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base,
+ &amdgpu_connector_dp_helper_funcs);
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ amdgpu_connector->dac_load_detect = true;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.load_detect_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+ break;
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_DVID:
+ case DRM_MODE_CONNECTOR_HDMIA:
+ case DRM_MODE_CONNECTOR_HDMIB:
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base,
+ &amdgpu_connector_dp_helper_funcs);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_property,
+ UNDERSCAN_OFF);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_hborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_vborder_property,
+ 0);
+
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+ AMDGPU_FMT_DITHER_DISABLE);
+
+ if (amdgpu_audio != 0) {
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = true;
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ connector->doublescan_allowed = true;
+ else
+ connector->doublescan_allowed = false;
+ if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+ amdgpu_connector->dac_load_detect = true;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.load_detect_property,
+ 1);
+ }
+ break;
+ case DRM_MODE_CONNECTOR_LVDS:
+ case DRM_MODE_CONNECTOR_eDP:
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_edp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base,
+ &amdgpu_connector_dp_helper_funcs);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_FULLSCREEN);
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = false;
+ connector->doublescan_allowed = false;
+ break;
+ }
+ } else {
+ switch (connector_type) {
+ case DRM_MODE_CONNECTOR_VGA:
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (!amdgpu_connector->ddc_bus)
+ DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_vga_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
+ amdgpu_connector->dac_load_detect = true;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.load_detect_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+ /* no HPD on analog connectors */
+ amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ break;
+ case DRM_MODE_CONNECTOR_DVIA:
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (!amdgpu_connector->ddc_bus)
+ DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_vga_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
+ amdgpu_connector->dac_load_detect = true;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.load_detect_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+ /* no HPD on analog connectors */
+ amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
+ connector->interlace_allowed = true;
+ connector->doublescan_allowed = true;
+ break;
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_DVID:
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (!amdgpu_connector->ddc_bus)
+ DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dvi_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
+ subpixel_order = SubPixelHorizontalRGB;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.coherent_mode_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_property,
+ UNDERSCAN_OFF);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_hborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_vborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+
+ if (amdgpu_audio != 0) {
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+ AMDGPU_FMT_DITHER_DISABLE);
+ if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+ amdgpu_connector->dac_load_detect = true;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.load_detect_property,
+ 1);
+ }
+ connector->interlace_allowed = true;
+ if (connector_type == DRM_MODE_CONNECTOR_DVII)
+ connector->doublescan_allowed = true;
+ else
+ connector->doublescan_allowed = false;
+ break;
+ case DRM_MODE_CONNECTOR_HDMIA:
+ case DRM_MODE_CONNECTOR_HDMIB:
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (!amdgpu_connector->ddc_bus)
+ DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dvi_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.coherent_mode_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_property,
+ UNDERSCAN_OFF);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_hborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_vborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+ if (amdgpu_audio != 0) {
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+ AMDGPU_FMT_DITHER_DISABLE);
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = true;
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ connector->doublescan_allowed = true;
+ else
+ connector->doublescan_allowed = false;
+ break;
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (amdgpu_connector->ddc_bus) {
+ has_aux = true;
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
+ DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_dp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
+ subpixel_order = SubPixelHorizontalRGB;
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.coherent_mode_property,
+ 1);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_property,
+ UNDERSCAN_OFF);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_hborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.underscan_vborder_property,
+ 0);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_NONE);
+ if (amdgpu_audio != 0) {
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.audio_property,
+ AMDGPU_AUDIO_AUTO);
+ amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+ }
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ adev->mode_info.dither_property,
+ AMDGPU_FMT_DITHER_DISABLE);
+ connector->interlace_allowed = true;
+ /* in theory with a DP to VGA converter... */
+ connector->doublescan_allowed = false;
+ break;
+ case DRM_MODE_CONNECTOR_eDP:
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (amdgpu_connector->ddc_bus) {
+ has_aux = true;
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ } else {
+ DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ }
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_edp_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_FULLSCREEN);
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = false;
+ connector->doublescan_allowed = false;
+ break;
+ case DRM_MODE_CONNECTOR_LVDS:
+ amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+ if (!amdgpu_dig_connector)
+ goto failed;
+ amdgpu_connector->con_priv = amdgpu_dig_connector;
+ if (i2c_bus->valid) {
+ amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+ if (!amdgpu_connector->ddc_bus)
+ DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ else
+ ddc = &amdgpu_connector->ddc_bus->adapter;
+ }
+ drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+ &amdgpu_connector_lvds_funcs,
+ connector_type,
+ ddc);
+ drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);
+ drm_object_attach_property(&amdgpu_connector->base.base,
+ dev->mode_config.scaling_mode_property,
+ DRM_MODE_SCALE_FULLSCREEN);
+ subpixel_order = SubPixelHorizontalRGB;
+ connector->interlace_allowed = false;
+ connector->doublescan_allowed = false;
+ break;
+ }
+ }
+
+ if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
+ if (i2c_bus->valid) {
+ connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+ DRM_CONNECTOR_POLL_DISCONNECT;
+ }
+ } else
+ connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+ connector->display_info.subpixel_order = subpixel_order;
+
+ if (has_aux)
+ amdgpu_atombios_dp_aux_init(amdgpu_connector);
+
+ if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+ connector_type == DRM_MODE_CONNECTOR_eDP) {
+ drm_connector_attach_dp_subconnector_property(&amdgpu_connector->base);
+ }
+
+ return;
+
+failed:
+ drm_connector_cleanup(connector);
+ kfree(connector);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
new file mode 100644
index 000000000..61fcef15a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CONNECTORS_H__
+#define __AMDGPU_CONNECTORS_H__
+
+struct edid *amdgpu_connector_edid(struct drm_connector *connector);
+void amdgpu_connector_hotplug(struct drm_connector *connector);
+int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector);
+u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector);
+bool amdgpu_connector_is_dp12_capable(struct drm_connector *connector);
+void
+amdgpu_connector_add(struct amdgpu_device *adev,
+ uint32_t connector_id,
+ uint32_t supported_device,
+ int connector_type,
+ struct amdgpu_i2c_bus_rec *i2c_bus,
+ uint16_t connector_object_id,
+ struct amdgpu_hpd *hpd,
+ struct amdgpu_router *router);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
new file mode 100644
index 000000000..c0a3afe81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -0,0 +1,1786 @@
+/*
+ * Copyright 2008 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ */
+
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/sync_file.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "amdgpu_cs.h"
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_ras.h"
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+ struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ if (cs->in.num_chunks == 0)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->adev = adev;
+ p->filp = filp;
+
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (atomic_read(&p->ctx->guilty)) {
+ amdgpu_ctx_put(p->ctx);
+ return -ECANCELED;
+ }
+
+ amdgpu_sync_create(&p->sync);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES);
+ return 0;
+}
+
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+ struct drm_sched_entity *entity;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, &entity);
+ if (r)
+ return r;
+
+ /*
+ * Abort if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP.
+ */
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ /* Check if we can add this IB to some existing job */
+ for (i = 0; i < p->gang_size; ++i)
+ if (p->entities[i] == entity)
+ return i;
+
+ /* If not increase the gang size if possible */
+ if (i == AMDGPU_CS_GANG_SIZE)
+ return -EINVAL;
+
+ p->entities[i] = entity;
+ p->gang_size = i + 1;
+ return i;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+ unsigned int *num_ibs)
+{
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+ return -EINVAL;
+
+ ++(num_ibs[r]);
+ p->gang_leader_idx = r;
+ return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
+{
+ struct drm_gem_object *gobj;
+ unsigned long size;
+
+ gobj = drm_gem_object_lookup(p->filp, data->handle);
+ if (gobj == NULL)
+ return -EINVAL;
+
+ p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ size = amdgpu_bo_size(p->uf_bo);
+ if (size != PAGE_SIZE || data->offset > (size - 8))
+ return -EINVAL;
+
+ if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+ return -EINVAL;
+
+ *offset = data->offset;
+ return 0;
+}
+
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
+{
+ struct drm_amdgpu_bo_list_entry *info;
+ int r;
+
+ r = amdgpu_bo_create_list_entry_array(data, &info);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+ &p->bo_list);
+ if (r)
+ goto error_free;
+
+ kvfree(info);
+ return 0;
+
+error_free:
+ kvfree(info);
+
+ return r;
+}
+
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
+ struct amdgpu_vm *vm = &fpriv->vm;
+ uint64_t *chunk_array_user;
+ uint64_t *chunk_array;
+ uint32_t uf_offset = 0;
+ size_t size;
+ int ret;
+ int i;
+
+ chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
+ GFP_KERNEL);
+ if (!chunk_array)
+ return -ENOMEM;
+
+ /* get chunks */
+ chunk_array_user = u64_to_user_ptr(cs->in.chunks);
+ if (copy_from_user(chunk_array, chunk_array_user,
+ sizeof(uint64_t)*cs->in.num_chunks)) {
+ ret = -EFAULT;
+ goto free_chunk;
+ }
+
+ p->nchunks = cs->in.num_chunks;
+ p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+ GFP_KERNEL);
+ if (!p->chunks) {
+ ret = -ENOMEM;
+ goto free_chunk;
+ }
+
+ for (i = 0; i < p->nchunks; i++) {
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk user_chunk;
+ uint32_t __user *cdata;
+
+ chunk_ptr = u64_to_user_ptr(chunk_array[i]);
+ if (copy_from_user(&user_chunk, chunk_ptr,
+ sizeof(struct drm_amdgpu_cs_chunk))) {
+ ret = -EFAULT;
+ i--;
+ goto free_partial_kdata;
+ }
+ p->chunks[i].chunk_id = user_chunk.chunk_id;
+ p->chunks[i].length_dw = user_chunk.length_dw;
+
+ size = p->chunks[i].length_dw;
+ cdata = u64_to_user_ptr(user_chunk.chunk_data);
+
+ p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
+ GFP_KERNEL);
+ if (p->chunks[i].kdata == NULL) {
+ ret = -ENOMEM;
+ i--;
+ goto free_partial_kdata;
+ }
+ size *= sizeof(uint32_t);
+ if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
+ ret = -EFAULT;
+ goto free_partial_kdata;
+ }
+
+ /* Assume the worst on the following checks */
+ ret = -EINVAL;
+ switch (p->chunks[i].chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
+ if (ret)
+ goto free_partial_kdata;
+ break;
+
+ case AMDGPU_CHUNK_ID_FENCE:
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+ &uf_offset);
+ if (ret)
+ goto free_partial_kdata;
+ break;
+
+ case AMDGPU_CHUNK_ID_BO_HANDLES:
+ if (size < sizeof(struct drm_amdgpu_bo_list_in))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+ break;
+
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ break;
+
+ default:
+ goto free_partial_kdata;
+ }
+ }
+
+ if (!p->gang_size) {
+ ret = -EINVAL;
+ goto free_all_kdata;
+ }
+
+ for (i = 0; i < p->gang_size; ++i) {
+ ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+ num_ibs[i], &p->jobs[i]);
+ if (ret)
+ goto free_all_kdata;
+ }
+ p->gang_leader = p->jobs[p->gang_leader_idx];
+
+ if (p->ctx->generation != p->gang_leader->generation) {
+ ret = -ECANCELED;
+ goto free_all_kdata;
+ }
+
+ if (p->uf_bo)
+ p->gang_leader->uf_addr = uf_offset;
+ kvfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ amdgpu_vm_set_task_info(vm);
+
+ return 0;
+
+free_all_kdata:
+ i = p->nchunks - 1;
+free_partial_kdata:
+ for (; i >= 0; i--)
+ kvfree(p->chunks[i].kdata);
+ kvfree(p->chunks);
+ p->chunks = NULL;
+ p->nchunks = 0;
+free_chunk:
+ kvfree(chunk_array);
+
+ return ret;
+}
+
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk,
+ unsigned int *ce_preempt,
+ unsigned int *de_preempt)
+{
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_ring *ring;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ job = p->jobs[r];
+ ring = amdgpu_job_ring(job);
+ ib = &job->ibs[job->num_ibs++];
+
+ /* MM engine doesn't support user fences */
+ if (p->uf_bo && ring->funcs->no_user_fence)
+ return -EINVAL;
+
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ (*ce_preempt)++;
+ else
+ (*de_preempt)++;
+
+ /* Each GFX command submit allows only 1 IB max
+ * preemptible for CE & DE */
+ if (*ce_preempt > 1 || *de_preempt > 1)
+ return -EINVAL;
+ }
+
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+
+ r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+ return 0;
+}
+
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
+ struct dma_fence *fence;
+
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+ amdgpu_ctx_put(ctx);
+
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+ struct drm_sched_fence *s_fence;
+ struct dma_fence *old = fence;
+
+ s_fence = to_drm_sched_fence(fence);
+ fence = dma_fence_get(&s_fence->scheduled);
+ dma_fence_put(old);
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
+{
+ struct dma_fence *fence;
+ int r;
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ if (r) {
+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
+ return r;
+ }
+
+ r = amdgpu_sync_fence(&p->sync, fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i, r;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_deps[i].syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_deps[i].syncobj)
+ return -EINVAL;
+ p->post_deps[i].chain = NULL;
+ p->post_deps[i].point = 0;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+ unsigned int num_deps;
+ int i;
+
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+ dep->chain = NULL;
+ if (syncobj_deps[i].point) {
+ dep->chain = dma_fence_chain_alloc();
+ if (!dep->chain)
+ return -ENOMEM;
+ }
+
+ dep->syncobj = drm_syncobj_find(p->filp,
+ syncobj_deps[i].handle);
+ if (!dep->syncobj) {
+ dma_fence_chain_free(dep->chain);
+ return -EINVAL;
+ }
+ dep->point = syncobj_deps[i].point;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ int i;
+
+ if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;
+ p->jobs[i]->init_shadow =
+ shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
+{
+ unsigned int ce_preempt = 0, de_preempt = 0;
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ r = amdgpu_cs_p2_dependencies(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_p2_syncobj_in(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_p2_syncobj_out(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ r = amdgpu_cs_p2_shadow(p, chunk);
+ if (r)
+ return r;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Convert microseconds to bytes. */
+static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
+{
+ if (us <= 0 || !adev->mm_stats.log2_max_MBps)
+ return 0;
+
+ /* Since accum_us is incremented by a million per second, just
+ * multiply it by the number of MB/s to get the number of bytes.
+ */
+ return us << adev->mm_stats.log2_max_MBps;
+}
+
+static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
+{
+ if (!adev->mm_stats.log2_max_MBps)
+ return 0;
+
+ return bytes >> adev->mm_stats.log2_max_MBps;
+}
+
+/* Returns how many bytes TTM can move right now. If no bytes can be moved,
+ * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
+ * which means it can go over the threshold once. If that happens, the driver
+ * will be in debt and no other buffer migrations can be done until that debt
+ * is repaid.
+ *
+ * This approach allows moving a buffer of any size (it's important to allow
+ * that).
+ *
+ * The currency is simply time in microseconds and it increases as the clock
+ * ticks. The accumulated microseconds (us) are converted to bytes and
+ * returned.
+ */
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+ u64 *max_bytes,
+ u64 *max_vis_bytes)
+{
+ s64 time_us, increment_us;
+ u64 free_vram, total_vram, used_vram;
+ /* Allow a maximum of 200 accumulated ms. This is basically per-IB
+ * throttling.
+ *
+ * It means that in order to get full max MBps, at least 5 IBs per
+ * second must be submitted and not more than 200ms apart from each
+ * other.
+ */
+ const s64 us_upper_bound = 200000;
+
+ if (!adev->mm_stats.log2_max_MBps) {
+ *max_bytes = 0;
+ *max_vis_bytes = 0;
+ return;
+ }
+
+ total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
+ used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+ free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
+
+ spin_lock(&adev->mm_stats.lock);
+
+ /* Increase the amount of accumulated us. */
+ time_us = ktime_to_us(ktime_get());
+ increment_us = time_us - adev->mm_stats.last_update_us;
+ adev->mm_stats.last_update_us = time_us;
+ adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
+ us_upper_bound);
+
+ /* This prevents the short period of low performance when the VRAM
+ * usage is low and the driver is in debt or doesn't have enough
+ * accumulated us to fill VRAM quickly.
+ *
+ * The situation can occur in these cases:
+ * - a lot of VRAM is freed by userspace
+ * - the presence of a big buffer causes a lot of evictions
+ * (solution: split buffers into smaller ones)
+ *
+ * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
+ * accum_us to a positive number.
+ */
+ if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
+ s64 min_us;
+
+ /* Be more aggressive on dGPUs. Try to fill a portion of free
+ * VRAM now.
+ */
+ if (!(adev->flags & AMD_IS_APU))
+ min_us = bytes_to_us(adev, free_vram / 4);
+ else
+ min_us = 0; /* Reset accum_us on APUs. */
+
+ adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
+ }
+
+ /* This is set to 0 if the driver is in debt to disallow (optional)
+ * buffer moves.
+ */
+ *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+ /* Do the same for visible VRAM if half of it is free */
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
+ u64 total_vis_vram = adev->gmc.visible_vram_size;
+ u64 used_vis_vram =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+
+ if (used_vis_vram < total_vis_vram) {
+ u64 free_vis_vram = total_vis_vram - used_vis_vram;
+
+ adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+ increment_us, us_upper_bound);
+
+ if (free_vis_vram >= total_vis_vram / 2)
+ adev->mm_stats.accum_us_vis =
+ max(bytes_to_us(adev, free_vis_vram / 2),
+ adev->mm_stats.accum_us_vis);
+ }
+
+ *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+ } else {
+ *max_vis_bytes = 0;
+ }
+
+ spin_unlock(&adev->mm_stats.lock);
+}
+
+/* Report how many bytes have really been moved for the last command
+ * submission. This can result in a debt that can stop buffer migrations
+ * temporarily.
+ */
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+ u64 num_vis_bytes)
+{
+ spin_lock(&adev->mm_stats.lock);
+ adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+ adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
+ spin_unlock(&adev->mm_stats.lock);
+}
+
+static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_cs_parser *p = param;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ .resv = bo->tbo.base.resv
+ };
+ uint32_t domain;
+ int r;
+
+ if (bo->tbo.pin_count)
+ return 0;
+
+ /* Don't move this buffer if we have depleted our allowance
+ * to move it. Don't move anything if the threshold is zero.
+ */
+ if (p->bytes_moved < p->bytes_moved_threshold &&
+ (!bo->tbo.base.dma_buf ||
+ list_empty(&bo->tbo.base.dma_buf->attachments))) {
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+ /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+ * visible VRAM if we've depleted our allowance to do
+ * that.
+ */
+ if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+ domain = bo->preferred_domains;
+ else
+ domain = bo->allowed_domains;
+ } else {
+ domain = bo->preferred_domains;
+ }
+ } else {
+ domain = bo->allowed_domains;
+ }
+
+retry:
+ amdgpu_bo_placement_from_domain(bo, domain);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ p->bytes_moved += ctx.bytes_moved;
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo))
+ p->bytes_moved_vis += ctx.bytes_moved;
+
+ if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+ domain = bo->allowed_domains;
+ goto retry;
+ }
+
+ return r;
+}
+
+static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_list_entry *e;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ unsigned int i;
+ int r;
+
+ /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+ if (cs->in.bo_list_handle) {
+ if (p->bo_list)
+ return -EINVAL;
+
+ r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+ &p->bo_list);
+ if (r)
+ return r;
+ } else if (!p->bo_list) {
+ /* Create a empty bo_list when no handle is provided */
+ r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+ &p->bo_list);
+ if (r)
+ return r;
+ }
+
+ mutex_lock(&p->bo_list->bo_list_mutex);
+
+ /* Get userptr backing pages. If pages are updated after registered
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+ */
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ bool userpage_invalidated = false;
+ struct amdgpu_bo *bo = e->bo;
+ int i;
+
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!e->user_pages) {
+ DRM_ERROR("kvmalloc_array failure\n");
+ r = -ENOMEM;
+ goto out_free_user_pages;
+ }
+
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
+ if (r) {
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ goto out_free_user_pages;
+ }
+
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ userpage_invalidated = true;
+ break;
+ }
+ }
+ e->user_invalidated = userpage_invalidated;
+ }
+
+ drm_exec_until_all_locked(&p->exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ /* One fence for TTM and one for each CS job */
+ r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+ }
+
+ if (p->uf_bo) {
+ r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+ 1 + p->gang_size);
+ drm_exec_retry_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
+ }
+
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct mm_struct *usermm;
+
+ usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+ if (usermm && usermm != current->mm) {
+ r = -EPERM;
+ goto out_free_user_pages;
+ }
+
+ if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+ e->user_invalidated && e->user_pages) {
+ amdgpu_bo_placement_from_domain(e->bo,
+ AMDGPU_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+ &ctx);
+ if (r)
+ goto out_free_user_pages;
+
+ amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+ e->user_pages);
+ }
+
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ }
+
+ amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+ &p->bytes_moved_vis_threshold);
+ p->bytes_moved = 0;
+ p->bytes_moved_vis = 0;
+
+ r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
+ amdgpu_cs_bo_validate, p);
+ if (r) {
+ DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
+ goto out_free_user_pages;
+ }
+
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
+
+ if (p->uf_bo) {
+ r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
+ }
+
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+ p->bytes_moved_vis);
+
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+ p->bo_list->gws_obj,
+ p->bo_list->oa_obj);
+ return 0;
+
+out_free_user_pages:
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = e->bo;
+
+ if (!e->user_pages)
+ continue;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ e->range = NULL;
+ }
+ mutex_unlock(&p->bo_list->bo_list_mutex);
+ return r;
+}
+
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
+{
+ int i, j;
+
+ if (!trace_amdgpu_cs_enabled())
+ return;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ struct amdgpu_job *job = p->jobs[i];
+
+ for (j = 0; j < job->num_ibs; ++j)
+ trace_amdgpu_cs(p, job, &job->ibs[j]);
+ }
+}
+
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ unsigned int i;
+ int r;
+
+ /* Only for UVD/VCE VM emulation */
+ if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+ return 0;
+
+ for (i = 0; i < job->num_ibs; ++i) {
+ struct amdgpu_ib *ib = &job->ibs[i];
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj;
+ uint64_t va_start;
+ uint8_t *kptr;
+
+ va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ DRM_ERROR("IB va_start is invalid\n");
+ return r;
+ }
+
+ if ((va_start + ib->length_dw * 4) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
+
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r)
+ return r;
+
+ kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
+
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, ib->length_dw * 4);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, job, ib);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *job = p->gang_leader;
+ struct amdgpu_device *adev = p->adev;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_list_entry *e;
+ struct amdgpu_bo_va *bo_va;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
+ if (r)
+ return r;
+
+ if (fpriv->csa_va) {
+ bo_va = fpriv->csa_va;
+ BUG_ON(!bo_va);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ if (r)
+ return r;
+ }
+
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ bo_va = e->bo_va;
+ if (bo_va == NULL)
+ continue;
+
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vm_handle_moved(adev, vm);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_update_pdes(adev, vm, false);
+ if (r)
+ return r;
+
+ r = amdgpu_sync_fence(&p->sync, vm->last_update);
+ if (r)
+ return r;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ job = p->jobs[i];
+
+ if (!job->vm)
+ continue;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ }
+
+ if (amdgpu_vm_debug) {
+ /* Invalidate all BOs to test for userspace bugs */
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = e->bo;
+
+ /* ignore duplicates */
+ if (!bo)
+ continue;
+
+ amdgpu_vm_bo_invalidate(adev, bo, false);
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_gpu_scheduler *sched;
+ struct drm_gem_object *obj;
+ struct dma_fence *fence;
+ unsigned long index;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
+ if (r) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+ return r;
+ }
+
+ drm_exec_for_each_locked_object(&p->exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
+
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
+ &fpriv->vm);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
+ if (r)
+ return r;
+ }
+
+ sched = p->gang_leader->base.entity->rq->sched;
+ while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+ /*
+ * When we have an dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one
+ * before we start executing on the same scheduler ring.
+ */
+ if (!s_fence || s_fence->sched != sched) {
+ dma_fence_put(fence);
+ continue;
+ }
+
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+ int i;
+
+ for (i = 0; i < p->num_post_deps; ++i) {
+ if (p->post_deps[i].chain && p->post_deps[i].point) {
+ drm_syncobj_add_point(p->post_deps[i].syncobj,
+ p->post_deps[i].chain,
+ p->fence, p->post_deps[i].point);
+ p->post_deps[i].chain = NULL;
+ } else {
+ drm_syncobj_replace_fence(p->post_deps[i].syncobj,
+ p->fence);
+ }
+ }
+}
+
+static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *leader = p->gang_leader;
+ struct amdgpu_bo_list_entry *e;
+ struct drm_gem_object *gobj;
+ unsigned long index;
+ unsigned int i;
+ uint64_t seq;
+ int r;
+
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_arm(&p->jobs[i]->base);
+
+ for (i = 0; i < p->gang_size; ++i) {
+ struct dma_fence *fence;
+
+ if (p->jobs[i] == leader)
+ continue;
+
+ fence = &p->jobs[i]->base.s_fence->scheduled;
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&leader->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ return r;
+ }
+ }
+
+ if (p->gang_size > 1) {
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_gang_leader(p->jobs[i], leader);
+ }
+
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
+ */
+ mutex_lock(&p->adev->notifier_lock);
+
+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+ */
+ r = 0;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
+ e->range);
+ e->range = NULL;
+ }
+ if (r) {
+ r = -EAGAIN;
+ mutex_unlock(&p->adev->notifier_lock);
+ return r;
+ }
+
+ p->fence = dma_fence_get(&leader->base.s_fence->finished);
+ drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+ ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
+
+ /* Everybody except for the gang leader uses READ */
+ for (i = 0; i < p->gang_size; ++i) {
+ if (p->jobs[i] == leader)
+ continue;
+
+ dma_resv_add_fence(gobj->resv,
+ &p->jobs[i]->base.s_fence->finished,
+ DMA_RESV_USAGE_READ);
+ }
+
+ /* The gang leader as remembered as writer */
+ dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
+ }
+
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
+ p->fence);
+ amdgpu_cs_post_dependencies(p);
+
+ if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ !p->ctx->preamble_presented) {
+ leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ p->ctx->preamble_presented = true;
+ }
+
+ cs->out.handle = seq;
+ leader->uf_sequence = seq;
+
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
+ for (i = 0; i < p->gang_size; ++i) {
+ amdgpu_job_free_resources(p->jobs[i]);
+ trace_amdgpu_cs_ioctl(p->jobs[i]);
+ drm_sched_entity_push_job(&p->jobs[i]->base);
+ p->jobs[i] = NULL;
+ }
+
+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
+ mutex_unlock(&p->adev->notifier_lock);
+ mutex_unlock(&p->bo_list->bo_list_mutex);
+ return 0;
+}
+
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(&parser->sync);
+ drm_exec_fini(&parser->exec);
+
+ for (i = 0; i < parser->num_post_deps; i++) {
+ drm_syncobj_put(parser->post_deps[i].syncobj);
+ kfree(parser->post_deps[i].chain);
+ }
+ kfree(parser->post_deps);
+
+ dma_fence_put(parser->fence);
+
+ if (parser->ctx)
+ amdgpu_ctx_put(parser->ctx);
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
+ for (i = 0; i < parser->nchunks; i++)
+ kvfree(parser->chunks[i].kdata);
+ kvfree(parser->chunks);
+ for (i = 0; i < parser->gang_size; ++i) {
+ if (parser->jobs[i])
+ amdgpu_job_free(parser->jobs[i]);
+ }
+ amdgpu_bo_unref(&parser->uf_bo);
+}
+
+int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_cs_parser parser;
+ int r;
+
+ if (amdgpu_ras_intr_triggered())
+ return -EHWPOISON;
+
+ if (!adev->accel_working)
+ return -EBUSY;
+
+ r = amdgpu_cs_parser_init(&parser, adev, filp, data);
+ if (r) {
+ if (printk_ratelimit())
+ DRM_ERROR("Failed to initialize parser %d!\n", r);
+ return r;
+ }
+
+ r = amdgpu_cs_pass1(&parser, data);
+ if (r)
+ goto error_fini;
+
+ r = amdgpu_cs_pass2(&parser);
+ if (r)
+ goto error_fini;
+
+ r = amdgpu_cs_parser_bos(&parser, data);
+ if (r) {
+ if (r == -ENOMEM)
+ DRM_ERROR("Not enough memory for command submission!\n");
+ else if (r != -ERESTARTSYS && r != -EAGAIN)
+ DRM_DEBUG("Failed to process the buffer list %d!\n", r);
+ goto error_fini;
+ }
+
+ r = amdgpu_cs_patch_jobs(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_vm_handling(&parser);
+ if (r)
+ goto error_backoff;
+
+ r = amdgpu_cs_sync_rings(&parser);
+ if (r)
+ goto error_backoff;
+
+ trace_amdgpu_cs_ibs(&parser);
+
+ r = amdgpu_cs_submit(&parser, data);
+ if (r)
+ goto error_backoff;
+
+ amdgpu_cs_parser_fini(&parser);
+ return 0;
+
+error_backoff:
+ mutex_unlock(&parser.bo_list->bo_list_mutex);
+
+error_fini:
+ amdgpu_cs_parser_fini(&parser);
+ return r;
+}
+
+/**
+ * amdgpu_cs_wait_ioctl - wait for a command submission to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ *
+ * Wait for the command submission identified by handle to finish.
+ */
+int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_wait_cs *wait = data;
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+ struct drm_sched_entity *entity;
+ struct amdgpu_ctx *ctx;
+ struct dma_fence *fence;
+ long r;
+
+ ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+ wait->in.ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
+ if (IS_ERR(fence))
+ r = PTR_ERR(fence);
+ else if (fence) {
+ r = dma_fence_wait_timeout(fence, true, timeout);
+ if (r > 0 && fence->error)
+ r = fence->error;
+ dma_fence_put(fence);
+ } else
+ r = 1;
+
+ amdgpu_ctx_put(ctx);
+ if (r < 0)
+ return r;
+
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r == 0);
+
+ return 0;
+}
+
+/**
+ * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @user: drm_amdgpu_fence copied from user space
+ */
+static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_fence *user)
+{
+ struct drm_sched_entity *entity;
+ struct amdgpu_ctx *ctx;
+ struct dma_fence *fence;
+ int r;
+
+ ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
+ if (ctx == NULL)
+ return ERR_PTR(-EINVAL);
+
+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+ user->ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return ERR_PTR(r);
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
+ amdgpu_ctx_put(ctx);
+
+ return fence;
+}
+
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union drm_amdgpu_fence_to_handle *info = data;
+ struct dma_fence *fence;
+ struct drm_syncobj *syncobj;
+ struct sync_file *sync_file;
+ int fd, r;
+
+ fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+
+ if (!fence)
+ fence = dma_fence_get_stub();
+
+ switch (info->in.what) {
+ case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
+ r = drm_syncobj_create(&syncobj, 0, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
+ drm_syncobj_put(syncobj);
+ return r;
+
+ case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
+ r = drm_syncobj_create(&syncobj, 0, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
+ drm_syncobj_put(syncobj);
+ return r;
+
+ case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ dma_fence_put(fence);
+ return fd;
+ }
+
+ sync_file = sync_file_create(fence);
+ dma_fence_put(fence);
+ if (!sync_file) {
+ put_unused_fd(fd);
+ return -ENOMEM;
+ }
+
+ fd_install(fd, sync_file->file);
+ info->out.handle = fd;
+ return 0;
+
+ default:
+ dma_fence_put(fence);
+ return -EINVAL;
+ }
+}
+
+/**
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_wait_fences *wait,
+ struct drm_amdgpu_fence *fences)
+{
+ uint32_t fence_count = wait->in.fence_count;
+ unsigned int i;
+ long r = 1;
+
+ for (i = 0; i < fence_count; i++) {
+ struct dma_fence *fence;
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+
+ fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ r = dma_fence_wait_timeout(fence, true, timeout);
+ if (r > 0 && fence->error)
+ r = fence->error;
+
+ dma_fence_put(fence);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+ }
+
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r > 0);
+
+ return 0;
+}
+
+/**
+ * amdgpu_cs_wait_any_fence - wait on any fence to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_wait_fences *wait,
+ struct drm_amdgpu_fence *fences)
+{
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+ uint32_t fence_count = wait->in.fence_count;
+ uint32_t first = ~0;
+ struct dma_fence **array;
+ unsigned int i;
+ long r;
+
+ /* Prepare the fence array */
+ array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
+
+ if (array == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < fence_count; i++) {
+ struct dma_fence *fence;
+
+ fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+ if (IS_ERR(fence)) {
+ r = PTR_ERR(fence);
+ goto err_free_fence_array;
+ } else if (fence) {
+ array[i] = fence;
+ } else { /* NULL, the fence has been already signaled */
+ r = 1;
+ first = i;
+ goto out;
+ }
+ }
+
+ r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
+ &first);
+ if (r < 0)
+ goto err_free_fence_array;
+
+out:
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r > 0);
+ wait->out.first_signaled = first;
+
+ if (first < fence_count && array[first])
+ r = array[first]->error;
+ else
+ r = 0;
+
+err_free_fence_array:
+ for (i = 0; i < fence_count; i++)
+ dma_fence_put(array[i]);
+ kfree(array);
+
+ return r;
+}
+
+/**
+ * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ */
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union drm_amdgpu_wait_fences *wait = data;
+ uint32_t fence_count = wait->in.fence_count;
+ struct drm_amdgpu_fence *fences_user;
+ struct drm_amdgpu_fence *fences;
+ int r;
+
+ /* Get the fences from userspace */
+ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
+ GFP_KERNEL);
+ if (fences == NULL)
+ return -ENOMEM;
+
+ fences_user = u64_to_user_ptr(wait->in.fences);
+ if (copy_from_user(fences, fences_user,
+ sizeof(struct drm_amdgpu_fence) * fence_count)) {
+ r = -EFAULT;
+ goto err_free_fences;
+ }
+
+ if (wait->in.wait_all)
+ r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
+ else
+ r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
+
+err_free_fences:
+ kfree(fences);
+
+ return r;
+}
+
+/**
+ * amdgpu_cs_find_mapping - find bo_va for VM address
+ *
+ * @parser: command submission parser context
+ * @addr: VM address
+ * @bo: resulting BO of the mapping found
+ * @map: Placeholder to return found BO mapping
+ *
+ * Search the buffer objects in the command submission context for a certain
+ * virtual memory address. Returns allocation structure when found, NULL
+ * otherwise.
+ */
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+ uint64_t addr, struct amdgpu_bo **bo,
+ struct amdgpu_bo_va_mapping **map)
+{
+ struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+ return -EINVAL;
+
+ *bo = mapping->bo_va->base.bo;
+ *map = mapping;
+
+ /* Double check that the BO is reserved by this CS */
+ if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
+ return -EINVAL;
+
+ if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
+ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
new file mode 100644
index 000000000..39c33ad10
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CS_H__
+#define __AMDGPU_CS_H__
+
+#include <linux/ww_mutex.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_CS_GANG_SIZE 4
+
+struct amdgpu_bo_va_mapping;
+
+struct amdgpu_cs_chunk {
+ uint32_t chunk_id;
+ uint32_t length_dw;
+ void *kdata;
+};
+
+struct amdgpu_cs_post_dep {
+ struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
+ u64 point;
+};
+
+struct amdgpu_cs_parser {
+ struct amdgpu_device *adev;
+ struct drm_file *filp;
+ struct amdgpu_ctx *ctx;
+
+ /* chunks */
+ unsigned nchunks;
+ struct amdgpu_cs_chunk *chunks;
+
+ /* scheduler job objects */
+ unsigned int gang_size;
+ unsigned int gang_leader_idx;
+ struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE];
+ struct amdgpu_job *gang_leader;
+
+ /* buffer objects */
+ struct drm_exec exec;
+ struct amdgpu_bo_list *bo_list;
+ struct amdgpu_mn *mn;
+ struct dma_fence *fence;
+ uint64_t bytes_moved_threshold;
+ uint64_t bytes_moved_vis_threshold;
+ uint64_t bytes_moved;
+ uint64_t bytes_moved_vis;
+
+ /* user fence */
+ struct amdgpu_bo *uf_bo;
+
+ unsigned num_post_deps;
+ struct amdgpu_cs_post_dep *post_deps;
+
+ struct amdgpu_sync sync;
+};
+
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+ uint64_t addr, struct amdgpu_bo **bo,
+ struct amdgpu_bo_va_mapping **mapping);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
new file mode 100644
index 000000000..720011019
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+
+ * * Author: Monk.liu@amd.com
+ */
+
+#include <drm/drm_exec.h>
+
+#include "amdgpu.h"
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
+{
+ uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+
+ addr -= AMDGPU_VA_RESERVED_SIZE;
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
+}
+
+int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
+ u32 domain, uint32_t size)
+{
+ void *ptr;
+
+ amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ domain, bo,
+ NULL, &ptr);
+ if (!*bo)
+ return -ENOMEM;
+
+ memset(ptr, 0, size);
+ adev->virt.csa_cpu_addr = ptr;
+ return 0;
+}
+
+void amdgpu_free_static_csa(struct amdgpu_bo **bo)
+{
+ amdgpu_bo_free_kernel(bo, NULL, NULL);
+}
+
+/*
+ * amdgpu_map_static_csa should be called during amdgpu_vm_init
+ * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
+ * submission of GFX should use this virtual address within META_DATA init
+ * package to support SRIOV gfx preemption.
+ */
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
+ uint64_t csa_addr, uint32_t size)
+{
+ struct drm_exec exec;
+ int r;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
+ }
+
+ *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!*bo_va) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
+ AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+ AMDGPU_PTE_EXECUTABLE);
+
+ if (r) {
+ DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr)
+{
+ struct drm_exec exec;
+ int r;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ goto error;
+ }
+ }
+
+ r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+ if (r) {
+ DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
new file mode 100644
index 000000000..7dfc1f201
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Monk.liu@amd.com
+ */
+
+#ifndef AMDGPU_CSA_MANAGER_H
+#define AMDGPU_CSA_MANAGER_H
+
+#define AMDGPU_CSA_SIZE (128 * 1024)
+
+uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
+int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
+ u32 domain, uint32_t size);
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
+ uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+ uint64_t csa_addr);
+void amdgpu_free_static_csa(struct amdgpu_bo **bo);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
new file mode 100644
index 000000000..76549c2cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -0,0 +1,994 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: monk liu <monk.liu@amd.com>
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_ras.h"
+#include <linux/nospec.h>
+
+#define to_amdgpu_ctx_entity(e) \
+ container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = 1,
+ [AMDGPU_HW_IP_COMPUTE] = 4,
+ [AMDGPU_HW_IP_DMA] = 2,
+ [AMDGPU_HW_IP_UVD] = 1,
+ [AMDGPU_HW_IP_VCE] = 1,
+ [AMDGPU_HW_IP_UVD_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_DEC] = 1,
+ [AMDGPU_HW_IP_VCN_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_JPEG] = 1,
+};
+
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
+{
+ switch (ctx_prio) {
+ case AMDGPU_CTX_PRIORITY_VERY_LOW:
+ case AMDGPU_CTX_PRIORITY_LOW:
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return true;
+ default:
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ /* UNSET priority is not valid and we don't carry that
+ * around, but set it to NORMAL in the only place this
+ * function is called, amdgpu_ctx_ioctl().
+ */
+ return false;
+ }
+}
+
+static enum drm_sched_priority
+amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
+{
+ switch (ctx_prio) {
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+ return DRM_SCHED_PRIORITY_NORMAL;
+
+ case AMDGPU_CTX_PRIORITY_VERY_LOW:
+ return DRM_SCHED_PRIORITY_MIN;
+
+ case AMDGPU_CTX_PRIORITY_LOW:
+ return DRM_SCHED_PRIORITY_MIN;
+
+ case AMDGPU_CTX_PRIORITY_NORMAL:
+ return DRM_SCHED_PRIORITY_NORMAL;
+
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ return DRM_SCHED_PRIORITY_HIGH;
+
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return DRM_SCHED_PRIORITY_HIGH;
+
+ /* This should not happen as we sanitized userspace provided priority
+ * already, WARN if this happens.
+ */
+ default:
+ WARN(1, "Invalid context priority %d\n", ctx_prio);
+ return DRM_SCHED_PRIORITY_NORMAL;
+ }
+
+}
+
+static int amdgpu_ctx_priority_permit(struct drm_file *filp,
+ int32_t priority)
+{
+ /* NORMAL and below are accessible by everyone */
+ if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
+ return 0;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ if (drm_is_current_master(filp))
+ return 0;
+
+ return -EACCES;
+}
+
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
+{
+ switch (prio) {
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return AMDGPU_GFX_PIPE_PRIO_HIGH;
+ default:
+ return AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ }
+}
+
+static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
+{
+ switch (prio) {
+ case AMDGPU_CTX_PRIORITY_HIGH:
+ return AMDGPU_RING_PRIO_1;
+ case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
+
+static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ unsigned int hw_prio;
+ int32_t ctx_prio;
+
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+
+ switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
+ break;
+ case AMDGPU_HW_IP_VCE:
+ case AMDGPU_HW_IP_VCN_ENC:
+ hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
+ break;
+ default:
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+ break;
+ }
+
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+ if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+
+ return hw_prio;
+}
+
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+ struct drm_sched_fence *s_fence;
+
+ if (!fence)
+ return ns_to_ktime(0);
+
+ /* When the fence is not even scheduled it can't have spend time */
+ s_fence = to_drm_sched_fence(fence);
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+ return ns_to_ktime(0);
+
+ /* When it is still running account how much already spend */
+ if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+ return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+ return ktime_sub(s_fence->finished.timestamp,
+ s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+ struct amdgpu_ctx_entity *centity)
+{
+ ktime_t res = ns_to_ktime(0);
+ uint32_t i;
+
+ spin_lock(&ctx->ring_lock);
+ for (i = 0; i < amdgpu_sched_jobs; i++) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+ }
+ spin_unlock(&ctx->ring_lock);
+ return res;
+}
+
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
+ const u32 ring)
+{
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ struct amdgpu_ctx_entity *entity;
+ enum drm_sched_priority drm_prio;
+ unsigned int hw_prio, num_scheds;
+ int32_t ctx_prio;
+ int r;
+
+ entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
+
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ entity->hw_ip = hw_ip;
+ entity->sequence = 1;
+ hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+ drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
+
+ hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+
+ if (!(adev)->xcp_mgr) {
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ } else {
+ struct amdgpu_fpriv *fpriv;
+
+ fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+ r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
+ &num_scheds, &scheds);
+ if (r)
+ goto cleanup_entity;
+ }
+
+ /* disable load balance if the hw engine retains context among dependent jobs */
+ if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
+ hw_ip == AMDGPU_HW_IP_VCN_DEC ||
+ hw_ip == AMDGPU_HW_IP_UVD_ENC ||
+ hw_ip == AMDGPU_HW_IP_UVD) {
+ sched = drm_sched_pick_best(scheds, num_scheds);
+ scheds = &sched;
+ num_scheds = 1;
+ }
+
+ r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
+ &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
+ /* It's not an error if we fail to install the new entity */
+ if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
+ goto cleanup_entity;
+
+ return 0;
+
+cleanup_entity:
+ drm_sched_entity_fini(&entity->entity);
+
+error_free_entity:
+ kfree(entity);
+
+ return r;
+}
+
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ ktime_t res = ns_to_ktime(0);
+ int i;
+
+ if (!entity)
+ return res;
+
+ for (i = 0; i < amdgpu_sched_jobs; ++i) {
+ res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+ dma_fence_put(entity->fences[i]);
+ }
+
+ amdgpu_xcp_release_sched(adev, entity);
+
+ kfree(entity);
+ return res;
+}
+
+static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 *stable_pstate)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level current_level;
+
+ current_level = amdgpu_dpm_get_performance_level(adev);
+
+ switch (current_level) {
+ case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
+ break;
+ default:
+ *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+ struct drm_file *filp, struct amdgpu_ctx *ctx)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ u32 current_stable_pstate;
+ int r;
+
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ kref_init(&ctx->refcount);
+ ctx->mgr = mgr;
+ spin_lock_init(&ctx->ring_lock);
+
+ ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
+ ctx->init_priority = priority;
+ ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r)
+ return r;
+
+ if (mgr->adev->pm.stable_pstate_ctx)
+ ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
+ else
+ ctx->stable_pstate = current_stable_pstate;
+
+ ctx->ctx_mgr = &(fpriv->ctx_mgr);
+ return 0;
+}
+
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+ u32 stable_pstate)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ enum amd_dpm_forced_level level;
+ u32 current_stable_pstate;
+ int r;
+
+ mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+ if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
+ r = -EBUSY;
+ goto done;
+ }
+
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r || (stable_pstate == current_stable_pstate))
+ goto done;
+
+ switch (stable_pstate) {
+ case AMDGPU_CTX_STABLE_PSTATE_NONE:
+ level = AMD_DPM_FORCED_LEVEL_AUTO;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
+ break;
+ case AMDGPU_CTX_STABLE_PSTATE_PEAK:
+ level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
+ break;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+ r = amdgpu_dpm_force_performance_level(adev, level);
+
+ if (level == AMD_DPM_FORCED_LEVEL_AUTO)
+ adev->pm.stable_pstate_ctx = NULL;
+ else
+ adev->pm.stable_pstate_ctx = ctx;
+done:
+ mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
+
+ return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+ struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ struct amdgpu_ctx_mgr *mgr = ctx->mgr;
+ struct amdgpu_device *adev = mgr->adev;
+ unsigned i, j, idx;
+
+ if (!adev)
+ return;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+ ktime_t spend;
+
+ spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
+ atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
+ }
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+ drm_dev_exit(idx);
+ }
+
+ kfree(ctx);
+}
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity)
+{
+ int r;
+ struct drm_sched_entity *ctx_entity;
+
+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+ return -EINVAL;
+ }
+
+ /* Right now all IPs have only one instance - multiple rings. */
+ if (instance != 0) {
+ DRM_DEBUG("invalid ip instance: %d\n", instance);
+ return -EINVAL;
+ }
+
+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+ return -EINVAL;
+ }
+
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+ ctx_entity = &ctx->entities[hw_ip][ring]->entity;
+ r = drm_sched_entity_error(ctx_entity);
+ if (r) {
+ DRM_DEBUG("error entity %p\n", ctx_entity);
+ return r;
+ }
+
+ *entity = ctx_entity;
+ return 0;
+}
+
+static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *filp,
+ int32_t priority,
+ uint32_t *id)
+{
+ struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+ struct amdgpu_ctx *ctx;
+ int r;
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ mutex_lock(&mgr->lock);
+ r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
+ if (r < 0) {
+ mutex_unlock(&mgr->lock);
+ kfree(ctx);
+ return r;
+ }
+
+ *id = (uint32_t)r;
+ r = amdgpu_ctx_init(mgr, priority, filp, ctx);
+ if (r) {
+ idr_remove(&mgr->ctx_handles, *id);
+ *id = 0;
+ kfree(ctx);
+ }
+ mutex_unlock(&mgr->lock);
+ return r;
+}
+
+static void amdgpu_ctx_do_release(struct kref *ref)
+{
+ struct amdgpu_ctx *ctx;
+ u32 i, j;
+
+ ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
+
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
+
+ amdgpu_ctx_fini(ref);
+}
+
+static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
+{
+ struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+ struct amdgpu_ctx *ctx;
+
+ mutex_lock(&mgr->lock);
+ ctx = idr_remove(&mgr->ctx_handles, id);
+ if (ctx)
+ kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+ mutex_unlock(&mgr->lock);
+ return ctx ? 0 : -EINVAL;
+}
+
+static int amdgpu_ctx_query(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ union drm_amdgpu_ctx_out *out)
+{
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+ unsigned reset_counter;
+
+ if (!fpriv)
+ return -EINVAL;
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (!ctx) {
+ mutex_unlock(&mgr->lock);
+ return -EINVAL;
+ }
+
+ /* TODO: these two are always zero */
+ out->state.flags = 0x0;
+ out->state.hangs = 0x0;
+
+ /* determine if a GPU reset has occured since the last call */
+ reset_counter = atomic_read(&adev->gpu_reset_counter);
+ /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
+ if (ctx->reset_counter_query == reset_counter)
+ out->state.reset_status = AMDGPU_CTX_NO_RESET;
+ else
+ out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
+ ctx->reset_counter_query = reset_counter;
+
+ mutex_unlock(&mgr->lock);
+ return 0;
+}
+
+#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
+
+static int amdgpu_ctx_query2(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ union drm_amdgpu_ctx_out *out)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+
+ if (!fpriv)
+ return -EINVAL;
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (!ctx) {
+ mutex_unlock(&mgr->lock);
+ return -EINVAL;
+ }
+
+ out->state.flags = 0x0;
+ out->state.hangs = 0x0;
+
+ if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
+
+ if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
+
+ if (atomic_read(&ctx->guilty))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
+
+ if (amdgpu_in_reset(adev))
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
+ if (adev->ras_enabled && con) {
+ /* Return the cached values in O(1),
+ * and schedule delayed work to cache
+ * new vaues.
+ */
+ int ce_count, ue_count;
+
+ ce_count = atomic_read(&con->ras_ce_count);
+ ue_count = atomic_read(&con->ras_ue_count);
+
+ if (ce_count != ctx->ras_counter_ce) {
+ ctx->ras_counter_ce = ce_count;
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+ }
+
+ if (ue_count != ctx->ras_counter_ue) {
+ ctx->ras_counter_ue = ue_count;
+ out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+ }
+
+ schedule_delayed_work(&con->ras_counte_delay_work,
+ msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
+ }
+
+ mutex_unlock(&mgr->lock);
+ return 0;
+}
+
+static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv, uint32_t id,
+ bool set, u32 *stable_pstate)
+{
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+ int r;
+
+ if (!fpriv)
+ return -EINVAL;
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (!ctx) {
+ mutex_unlock(&mgr->lock);
+ return -EINVAL;
+ }
+
+ if (set)
+ r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
+ else
+ r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
+
+ mutex_unlock(&mgr->lock);
+ return r;
+}
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ int r;
+ uint32_t id, stable_pstate;
+ int32_t priority;
+
+ union drm_amdgpu_ctx *args = data;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ id = args->in.ctx_id;
+ priority = args->in.priority;
+
+ /* For backwards compatibility, we need to accept ioctls with garbage
+ * in the priority field. Garbage values in the priority field, result
+ * in the priority being set to NORMAL.
+ */
+ if (!amdgpu_ctx_priority_is_valid(priority))
+ priority = AMDGPU_CTX_PRIORITY_NORMAL;
+
+ switch (args->in.op) {
+ case AMDGPU_CTX_OP_ALLOC_CTX:
+ r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
+ args->out.alloc.ctx_id = id;
+ break;
+ case AMDGPU_CTX_OP_FREE_CTX:
+ r = amdgpu_ctx_free(fpriv, id);
+ break;
+ case AMDGPU_CTX_OP_QUERY_STATE:
+ r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+ break;
+ case AMDGPU_CTX_OP_QUERY_STATE2:
+ r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
+ break;
+ case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
+ if (args->in.flags)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
+ if (!r)
+ args->out.pstate.flags = stable_pstate;
+ break;
+ case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
+ if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
+ return -EINVAL;
+ stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
+ if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
+ return -EINVAL;
+ r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
+{
+ struct amdgpu_ctx *ctx;
+ struct amdgpu_ctx_mgr *mgr;
+
+ if (!fpriv)
+ return NULL;
+
+ mgr = &fpriv->ctx_mgr;
+
+ mutex_lock(&mgr->lock);
+ ctx = idr_find(&mgr->ctx_handles, id);
+ if (ctx)
+ kref_get(&ctx->refcount);
+ mutex_unlock(&mgr->lock);
+ return ctx;
+}
+
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
+{
+ if (ctx == NULL)
+ return -EINVAL;
+
+ kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+ return 0;
+}
+
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence)
+{
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ uint64_t seq = centity->sequence;
+ struct dma_fence *other = NULL;
+ unsigned idx = 0;
+
+ idx = seq & (amdgpu_sched_jobs - 1);
+ other = centity->fences[idx];
+ WARN_ON(other && !dma_fence_is_signaled(other));
+
+ dma_fence_get(fence);
+
+ spin_lock(&ctx->ring_lock);
+ centity->fences[idx] = fence;
+ centity->sequence++;
+ spin_unlock(&ctx->ring_lock);
+
+ atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+ &ctx->mgr->time_spend[centity->hw_ip]);
+
+ dma_fence_put(other);
+ return seq;
+}
+
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ uint64_t seq)
+{
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ struct dma_fence *fence;
+
+ spin_lock(&ctx->ring_lock);
+
+ if (seq == ~0ull)
+ seq = centity->sequence - 1;
+
+ if (seq >= centity->sequence) {
+ spin_unlock(&ctx->ring_lock);
+ return ERR_PTR(-EINVAL);
+ }
+
+
+ if (seq + amdgpu_sched_jobs < centity->sequence) {
+ spin_unlock(&ctx->ring_lock);
+ return NULL;
+ }
+
+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
+ spin_unlock(&ctx->ring_lock);
+
+ return fence;
+}
+
+static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
+ struct amdgpu_ctx_entity *aentity,
+ int hw_ip,
+ int32_t priority)
+{
+ struct amdgpu_device *adev = ctx->mgr->adev;
+ unsigned int hw_prio;
+ struct drm_gpu_scheduler **scheds = NULL;
+ unsigned num_scheds;
+
+ /* set sw priority */
+ drm_sched_entity_set_priority(&aentity->entity,
+ amdgpu_ctx_to_drm_sched_prio(priority));
+
+ /* set hw priority */
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
+ hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+ hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
+ scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+ num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ drm_sched_entity_modify_sched(&aentity->entity, scheds,
+ num_scheds);
+ }
+}
+
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+ int32_t priority)
+{
+ int32_t ctx_prio;
+ unsigned i, j;
+
+ ctx->override_priority = priority;
+
+ ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
+
+ amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
+ i, ctx_prio);
+ }
+ }
+}
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity)
+{
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ struct dma_fence *other;
+ unsigned idx;
+ long r;
+
+ spin_lock(&ctx->ring_lock);
+ idx = centity->sequence & (amdgpu_sched_jobs - 1);
+ other = dma_fence_get(centity->fences[idx]);
+ spin_unlock(&ctx->ring_lock);
+
+ if (!other)
+ return 0;
+
+ r = dma_fence_wait(other, true);
+ if (r < 0 && r != -ERESTARTSYS)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
+ dma_fence_put(other);
+ return r;
+}
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev)
+{
+ unsigned int i;
+
+ mgr->adev = adev;
+ mutex_init(&mgr->lock);
+ idr_init_base(&mgr->ctx_handles, 1);
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ atomic64_set(&mgr->time_spend[i], 0);
+}
+
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i, j;
+
+ idp = &mgr->ctx_handles;
+
+ mutex_lock(&mgr->lock);
+ idr_for_each_entry(idp, ctx, id) {
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
+ }
+ }
+ mutex_unlock(&mgr->lock);
+ return timeout;
+}
+
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i, j;
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id) {
+ if (kref_read(&ctx->refcount) != 1) {
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ continue;
+ }
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
+ }
+}
+
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id;
+
+ amdgpu_ctx_mgr_entity_fini(mgr);
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id) {
+ if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ }
+
+ idr_destroy(&mgr->ctx_handles);
+ mutex_destroy(&mgr->lock);
+}
+
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM])
+{
+ struct amdgpu_ctx *ctx;
+ unsigned int hw_ip, i;
+ uint32_t id;
+
+ /*
+ * This is a little bit racy because it can be that a ctx or a fence are
+ * destroyed just in the moment we try to account them. But that is ok
+ * since exactly that case is explicitely allowed by the interface.
+ */
+ mutex_lock(&mgr->lock);
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
+
+ usage[hw_ip] = ns_to_ktime(ns);
+ }
+
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+ struct amdgpu_ctx_entity *centity;
+ ktime_t spend;
+
+ centity = ctx->entities[hw_ip][i];
+ if (!centity)
+ continue;
+ spend = amdgpu_ctx_entity_time(ctx, centity);
+ usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+ }
+ }
+ }
+ mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
new file mode 100644
index 000000000..85376baaa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CTX_H__
+#define __AMDGPU_CTX_H__
+
+#include <linux/ktime.h>
+#include <linux/types.h>
+
+#include "amdgpu_ring.h"
+
+struct drm_device;
+struct drm_file;
+struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
+
+#define AMDGPU_MAX_ENTITY_NUM 4
+
+struct amdgpu_ctx_entity {
+ uint32_t hw_ip;
+ uint64_t sequence;
+ struct drm_sched_entity entity;
+ struct dma_fence *fences[];
+};
+
+struct amdgpu_ctx {
+ struct kref refcount;
+ struct amdgpu_ctx_mgr *mgr;
+ unsigned reset_counter;
+ unsigned reset_counter_query;
+ uint64_t generation;
+ spinlock_t ring_lock;
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
+ bool preamble_presented;
+ int32_t init_priority;
+ int32_t override_priority;
+ atomic_t guilty;
+ unsigned long ras_counter_ce;
+ unsigned long ras_counter_ue;
+ uint32_t stable_pstate;
+ struct amdgpu_ctx_mgr *ctx_mgr;
+};
+
+struct amdgpu_ctx_mgr {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ /* protected by lock */
+ struct idr ctx_handles;
+ atomic64_t time_spend[AMDGPU_HW_IP_NUM];
+};
+
+extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence);
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ uint64_t seq);
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int32_t ctx_prio);
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity);
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+ struct amdgpu_device *adev);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+ ktime_t usage[AMDGPU_HW_IP_NUM]);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
new file mode 100644
index 000000000..418ff7cd6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -0,0 +1,2213 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_dm_debugfs.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_rap.h"
+#include "amdgpu_securedisplay.h"
+#include "amdgpu_fw_attestation.h"
+#include "amdgpu_umr.h"
+
+#include "amdgpu_reset.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos: Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62: Indicates a GRBM bank switch is needed
+ * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
+ * zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23: Indicates that the PM power gating lock should be held
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
+ char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+ bool pm_pg_lock, use_bank, use_ring;
+ unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
+
+ pm_pg_lock = use_bank = use_ring = false;
+ instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
+
+ if (size & 0x3 || *pos & 0x3 ||
+ ((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
+ return -EINVAL;
+
+ /* are we reading registers for which a PG lock is necessary? */
+ pm_pg_lock = (*pos >> 23) & 1;
+
+ if (*pos & (1ULL << 62)) {
+ se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
+ sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
+ instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
+
+ if (se_bank == 0x3FF)
+ se_bank = 0xFFFFFFFF;
+ if (sh_bank == 0x3FF)
+ sh_bank = 0xFFFFFFFF;
+ if (instance_bank == 0x3FF)
+ instance_bank = 0xFFFFFFFF;
+ use_bank = true;
+ } else if (*pos & (1ULL << 61)) {
+
+ me = (*pos & GENMASK_ULL(33, 24)) >> 24;
+ pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
+ queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
+ vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
+
+ use_ring = true;
+ } else {
+ use_bank = use_ring = false;
+ }
+
+ *pos &= (1UL << 22) - 1;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ if (use_bank) {
+ if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+ (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -EINVAL;
+ }
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, se_bank,
+ sh_bank, instance_bank, 0);
+ } else if (use_ring) {
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
+ }
+
+ if (pm_pg_lock)
+ mutex_lock(&adev->pm.mutex);
+
+ while (size) {
+ uint32_t value;
+
+ if (read) {
+ value = RREG32(*pos >> 2);
+ r = put_user(value, (uint32_t *)buf);
+ } else {
+ r = get_user(value, (uint32_t *)buf);
+ if (!r)
+ amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
+ }
+ if (r) {
+ result = r;
+ goto end;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+end:
+ if (use_bank) {
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ } else if (use_ring) {
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ if (pm_pg_lock)
+ mutex_unlock(&adev->pm.mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ amdgpu_virt_disable_access_debugfs(adev);
+ return result;
+}
+
+/*
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
+static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
+}
+
+/*
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
+static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
+}
+
+static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_regs2_data *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_regs2_data *rd = file->private_data;
+
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 offset, size_t size, int write_en)
+{
+ struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t value;
+
+ if (size & 0x3 || offset & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ mutex_lock(&rd->lock);
+
+ if (rd->id.use_grbm) {
+ if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
+ (rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ mutex_unlock(&rd->lock);
+ return -EINVAL;
+ }
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
+ rd->id.grbm.sh,
+ rd->id.grbm.instance, rd->id.xcc_id);
+ }
+
+ if (rd->id.use_srbm) {
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
+ rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
+ }
+
+ if (rd->id.pg_lock)
+ mutex_lock(&adev->pm.mutex);
+
+ while (size) {
+ if (!write_en) {
+ value = RREG32(offset >> 2);
+ r = put_user(value, (uint32_t *)buf);
+ } else {
+ r = get_user(value, (uint32_t *)buf);
+ if (!r)
+ amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id);
+ }
+ if (r) {
+ result = r;
+ goto end;
+ }
+ offset += 4;
+ size -= 4;
+ result += 4;
+ buf += 4;
+ }
+end:
+ if (rd->id.use_grbm) {
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ if (rd->id.use_srbm) {
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ if (rd->id.pg_lock)
+ mutex_unlock(&adev->pm.mutex);
+
+ mutex_unlock(&rd->lock);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ amdgpu_virt_disable_access_debugfs(adev);
+ return result;
+}
+
+static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+ struct amdgpu_debugfs_regs2_iocdata v1_data;
+ int r;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
+ r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
+ sizeof(rd->id));
+ if (r)
+ r = -EINVAL;
+ goto done;
+ case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
+ r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
+ sizeof(v1_data));
+ if (r) {
+ r = -EINVAL;
+ goto done;
+ }
+ goto v1_copy;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+v1_copy:
+ rd->id.use_srbm = v1_data.use_srbm;
+ rd->id.use_grbm = v1_data.use_grbm;
+ rd->id.pg_lock = v1_data.pg_lock;
+ rd->id.grbm.se = v1_data.grbm.se;
+ rd->id.grbm.sh = v1_data.grbm.sh;
+ rd->id.grbm.instance = v1_data.grbm.instance;
+ rd->id.srbm.me = v1_data.srbm.me;
+ rd->id.srbm.pipe = v1_data.srbm.pipe;
+ rd->id.srbm.queue = v1_data.srbm.queue;
+ rd->id.xcc_id = 0;
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_regs2_op(f, buf, *pos, size, 0);
+}
+
+static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf, size_t size, loff_t *pos)
+{
+ return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
+}
+
+static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+ rd->adev = file_inode(file)->i_private;
+ file->private_data = rd;
+ mutex_init(&rd->lock);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
+
+ mutex_destroy(&rd->lock);
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ struct amdgpu_device *adev = rd->adev;
+ ssize_t result = 0;
+ int r;
+ uint32_t *data, x;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -ENOMEM;
+ }
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
+
+ if (!rd->id.gpr_or_wave) {
+ x = 0;
+ if (adev->gfx.funcs->read_wave_data)
+ adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
+ } else {
+ x = size >> 2;
+ if (rd->id.gpr.vpgr_or_sgpr) {
+ if (adev->gfx.funcs->read_wave_vgprs)
+ adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
+ } else {
+ if (adev->gfx.funcs->read_wave_sgprs)
+ adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
+ }
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (!x) {
+ result = -EINVAL;
+ goto done;
+ }
+
+ while (size && (*pos < x * 4)) {
+ uint32_t value;
+
+ value = data[*pos >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto done;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+done:
+ amdgpu_virt_disable_access_debugfs(adev);
+ kfree(data);
+ return result;
+}
+
+static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+ struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+ int r = 0;
+
+ mutex_lock(&rd->lock);
+
+ switch (cmd) {
+ case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
+ if (copy_from_user(&rd->id,
+ (struct amdgpu_debugfs_gprwave_iocdata *)data,
+ sizeof(rd->id)))
+ r = -EFAULT;
+ goto done;
+ default:
+ r = -EINVAL;
+ goto done;
+ }
+
+done:
+ mutex_unlock(&rd->lock);
+ return r;
+}
+
+
+
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ value = RREG32_PCIE(*pos);
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ WREG32_PCIE(*pos, value);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (!adev->didt_rreg)
+ return -EOPNOTSUPP;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ value = RREG32_DIDT(*pos >> 2);
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (!adev->didt_wreg)
+ return -EOPNOTSUPP;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ WREG32_DIDT(*pos >> 2, value);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (!adev->smc_rreg)
+ return -EOPNOTSUPP;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ value = RREG32_SMC(*pos);
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (!adev->smc_wreg)
+ return -EOPNOTSUPP;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ WREG32_SMC(*pos, value);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion. The format is a series of DWORDs with the first
+ * indicating which revision it is. New content is appended to the
+ * end so that older software can still read the data.
+ */
+
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+ uint32_t *config, no_regs = 0;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
+ if (!config)
+ return -ENOMEM;
+
+ /* version, increment each time something is added */
+ config[no_regs++] = 5;
+ config[no_regs++] = adev->gfx.config.max_shader_engines;
+ config[no_regs++] = adev->gfx.config.max_tile_pipes;
+ config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+ config[no_regs++] = adev->gfx.config.max_sh_per_se;
+ config[no_regs++] = adev->gfx.config.max_backends_per_se;
+ config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+ config[no_regs++] = adev->gfx.config.max_gprs;
+ config[no_regs++] = adev->gfx.config.max_gs_threads;
+ config[no_regs++] = adev->gfx.config.max_hw_contexts;
+ config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+ config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+ config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+ config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+ config[no_regs++] = adev->gfx.config.num_tile_pipes;
+ config[no_regs++] = adev->gfx.config.backend_enable_mask;
+ config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+ config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+ config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+ config[no_regs++] = adev->gfx.config.num_gpus;
+ config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+ config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+ config[no_regs++] = adev->gfx.config.gb_addr_config;
+ config[no_regs++] = adev->gfx.config.num_rbs;
+
+ /* rev==1 */
+ config[no_regs++] = adev->rev_id;
+ config[no_regs++] = lower_32_bits(adev->pg_flags);
+ config[no_regs++] = lower_32_bits(adev->cg_flags);
+
+ /* rev==2 */
+ config[no_regs++] = adev->family;
+ config[no_regs++] = adev->external_rev_id;
+
+ /* rev==3 */
+ config[no_regs++] = adev->pdev->device;
+ config[no_regs++] = adev->pdev->revision;
+ config[no_regs++] = adev->pdev->subsystem_device;
+ config[no_regs++] = adev->pdev->subsystem_vendor;
+
+ /* rev==4 APU flag */
+ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
+
+ /* rev==5 PG/CG flag upper 32bit */
+ config[no_regs++] = upper_32_bits(adev->pg_flags);
+ config[no_regs++] = upper_32_bits(adev->cg_flags);
+
+ while (size && (*pos < no_regs * 4)) {
+ uint32_t value;
+
+ value = config[*pos >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ kfree(config);
+ return r;
+ }
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ kfree(config);
+ return result;
+}
+
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
+static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ int idx, x, outsize, r, valuesize;
+ uint32_t values[16];
+
+ if (size & 3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (!adev->pm.dpm_enabled)
+ return -EINVAL;
+
+ /* convert offset to sensor number */
+ idx = *pos >> 2;
+
+ valuesize = sizeof(values);
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (r) {
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+ }
+
+ if (size > valuesize) {
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -EINVAL;
+ }
+
+ outsize = 0;
+ x = 0;
+ if (!r) {
+ while (size) {
+ r = put_user(values[x++], (int32_t *)buf);
+ buf += 4;
+ size -= 4;
+ outsize += 4;
+ }
+ }
+
+ amdgpu_virt_disable_access_debugfs(adev);
+ return !r ? outsize : r;
+}
+
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..6: Byte offset into data
+ * Bits 7..14: SE selector
+ * Bits 15..22: SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used. See gfx_v8_0_read_wave_data() for an example output.
+ */
+static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = f->f_inode->i_private;
+ int r, x;
+ ssize_t result = 0;
+ uint32_t offset, se, sh, cu, wave, simd, data[32];
+
+ if (size & 3 || *pos & 3)
+ return -EINVAL;
+
+ /* decode offset */
+ offset = (*pos & GENMASK_ULL(6, 0));
+ se = (*pos & GENMASK_ULL(14, 7)) >> 7;
+ sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
+ cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
+ wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
+ simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
+
+ x = 0;
+ if (adev->gfx.funcs->read_wave_data)
+ adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ if (!x) {
+ amdgpu_virt_disable_access_debugfs(adev);
+ return -EINVAL;
+ }
+
+ while (size && (offset < x * 4)) {
+ uint32_t value;
+
+ value = data[offset >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ amdgpu_virt_disable_access_debugfs(adev);
+ return r;
+ }
+
+ result += 4;
+ buf += 4;
+ offset += 4;
+ size -= 4;
+ }
+
+ amdgpu_virt_disable_access_debugfs(adev);
+ return result;
+}
+
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..11: Byte offset into data
+ * Bits 12..19: SE selector
+ * Bits 20..27: SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
+static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = f->f_inode->i_private;
+ int r;
+ ssize_t result = 0;
+ uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+
+ if (size > 4096 || size & 3 || *pos & 3)
+ return -EINVAL;
+
+ /* decode offset */
+ offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
+ se = (*pos & GENMASK_ULL(19, 12)) >> 12;
+ sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
+ cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
+ wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
+ simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
+ thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
+ bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
+
+ data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0)
+ goto err;
+
+ r = amdgpu_virt_enable_access_debugfs(adev);
+ if (r < 0)
+ goto err;
+
+ /* switch to the specific se/sh/cu */
+ mutex_lock(&adev->grbm_idx_mutex);
+ amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
+
+ if (bank == 0) {
+ if (adev->gfx.funcs->read_wave_vgprs)
+ adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
+ } else {
+ if (adev->gfx.funcs->read_wave_sgprs)
+ adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
+ }
+
+ amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ while (size) {
+ uint32_t value;
+
+ value = data[result >> 2];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ amdgpu_virt_disable_access_debugfs(adev);
+ goto err;
+ }
+
+ result += 4;
+ buf += 4;
+ size -= 4;
+ }
+
+ kfree(data);
+ amdgpu_virt_disable_access_debugfs(adev);
+ return result;
+
+err:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ kfree(data);
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = amdgpu_get_gfx_off_residency(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u64 value = 0;
+
+ r = amdgpu_get_gfx_off_entrycount(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (u64 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * Write a 32-bit zero to disable or a 32-bit non-zero to enable
+ */
+static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ uint32_t value;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ goto out;
+
+ amdgpu_gfx_off_ctrl(adev, value ? true : false);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_read - read gfxoff status
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value = adev->gfx.gfx_off_state;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
+
+ r = amdgpu_get_gfx_off_status(adev, &value);
+ if (r)
+ goto out;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static const struct file_operations amdgpu_debugfs_regs2_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
+ .read = amdgpu_debugfs_regs2_read,
+ .write = amdgpu_debugfs_regs2_write,
+ .open = amdgpu_debugfs_regs2_open,
+ .release = amdgpu_debugfs_regs2_release,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gprwave_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
+ .read = amdgpu_debugfs_gprwave_read,
+ .open = amdgpu_debugfs_gprwave_open,
+ .release = amdgpu_debugfs_gprwave_release,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_regs_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_read,
+ .write = amdgpu_debugfs_regs_write,
+ .llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_didt_read,
+ .write = amdgpu_debugfs_regs_didt_write,
+ .llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_pcie_read,
+ .write = amdgpu_debugfs_regs_pcie_write,
+ .llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_regs_smc_read,
+ .write = amdgpu_debugfs_regs_smc_write,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gca_config_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_sensors_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_sensor_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_wave_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_wave_read,
+ .llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_gpr_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gpr_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_read,
+ .write = amdgpu_debugfs_gfxoff_write,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_status_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_count_read,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_residency_read,
+ .write = amdgpu_debugfs_gfxoff_residency_write,
+ .llseek = default_llseek
+};
+
+static const struct file_operations *debugfs_regs[] = {
+ &amdgpu_debugfs_regs_fops,
+ &amdgpu_debugfs_regs2_fops,
+ &amdgpu_debugfs_gprwave_fops,
+ &amdgpu_debugfs_regs_didt_fops,
+ &amdgpu_debugfs_regs_pcie_fops,
+ &amdgpu_debugfs_regs_smc_fops,
+ &amdgpu_debugfs_gca_config_fops,
+ &amdgpu_debugfs_sensors_fops,
+ &amdgpu_debugfs_wave_fops,
+ &amdgpu_debugfs_gpr_fops,
+ &amdgpu_debugfs_gfxoff_fops,
+ &amdgpu_debugfs_gfxoff_status_fops,
+ &amdgpu_debugfs_gfxoff_count_fops,
+ &amdgpu_debugfs_gfxoff_residency_fops,
+};
+
+static const char * const debugfs_regs_names[] = {
+ "amdgpu_regs",
+ "amdgpu_regs2",
+ "amdgpu_gprwave",
+ "amdgpu_regs_didt",
+ "amdgpu_regs_pcie",
+ "amdgpu_regs_smc",
+ "amdgpu_gca_config",
+ "amdgpu_sensors",
+ "amdgpu_wave",
+ "amdgpu_gpr",
+ "amdgpu_gfxoff",
+ "amdgpu_gfxoff_status",
+ "amdgpu_gfxoff_count",
+ "amdgpu_gfxoff_residency",
+};
+
+/**
+ * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
+ * register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *ent, *root = minor->debugfs_root;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+ ent = debugfs_create_file(debugfs_regs_names[i],
+ S_IFREG | 0444, root,
+ adev, debugfs_regs[i]);
+ if (!i && !IS_ERR_OR_NULL(ent))
+ i_size_write(ent->d_inode, adev->rmmio_size);
+ }
+
+ return 0;
+}
+
+static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r = 0, i;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ r = down_write_killable(&adev->reset_domain->sem);
+ if (r)
+ return r;
+
+ /* hold on the scheduler */
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+ kthread_park(ring->sched.thread);
+ }
+
+ seq_puts(m, "run ib test:\n");
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ seq_printf(m, "ib ring tests failed (%d).\n", r);
+ else
+ seq_puts(m, "ib ring tests passed.\n");
+
+ /* go on the scheduler */
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+ kthread_unpark(ring->sched.thread);
+ }
+
+ up_write(&adev->reset_domain->sem);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return 0;
+}
+
+
+static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ *val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_benchmark(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return r;
+ }
+
+ r = amdgpu_benchmark(adev, val);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return r;
+}
+
+static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_file *file;
+ int r;
+
+ r = mutex_lock_interruptible(&dev->filelist_mutex);
+ if (r)
+ return r;
+
+ list_for_each_entry(file, &dev->filelist, lhead) {
+ struct amdgpu_fpriv *fpriv = file->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ seq_printf(m, "pid:%d\tProcess:%s ----------\n",
+ vm->task_info.pid, vm->task_info.process_name);
+ r = amdgpu_bo_reserve(vm->root.bo, true);
+ if (r)
+ break;
+ amdgpu_debugfs_vm_bo_info(vm, m);
+ amdgpu_bo_unreserve(vm->root.bo);
+ }
+
+ mutex_unlock(&dev->filelist_mutex);
+
+ return r;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_test_ib);
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_vm_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram,
+ NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt,
+ NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark,
+ "%lld\n");
+
+static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
+ struct dma_fence **fences)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t sync_seq, last_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = ring->fence_drv.sync_seq;
+
+ last_seq &= drv->num_fences_mask;
+ sync_seq &= drv->num_fences_mask;
+
+ do {
+ struct dma_fence *fence, **ptr;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ ptr = &drv->fences[last_seq];
+
+ fence = rcu_dereference_protected(*ptr, 1);
+ RCU_INIT_POINTER(*ptr, NULL);
+
+ if (!fence)
+ continue;
+
+ fences[last_seq] = fence;
+
+ } while (last_seq != sync_seq);
+}
+
+static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
+ int length)
+{
+ int i;
+ struct dma_fence *fence;
+
+ for (i = 0; i < length; i++) {
+ fence = fences[i];
+ if (!fence)
+ continue;
+ dma_fence_signal(fence);
+ dma_fence_put(fence);
+ }
+}
+
+static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *s_job;
+ struct dma_fence *fence;
+
+ spin_lock(&sched->job_list_lock);
+ list_for_each_entry(s_job, &sched->pending_list, list) {
+ fence = sched->ops->run_job(s_job);
+ dma_fence_put(fence);
+ }
+ spin_unlock(&sched->job_list_lock);
+}
+
+static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
+{
+ struct amdgpu_job *job;
+ struct drm_sched_job *s_job, *tmp;
+ uint32_t preempt_seq;
+ struct dma_fence *fence, **ptr;
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ bool preempted = true;
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+ return;
+
+ preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
+ if (preempt_seq <= atomic_read(&drv->last_seq)) {
+ preempted = false;
+ goto no_preempt;
+ }
+
+ preempt_seq &= drv->num_fences_mask;
+ ptr = &drv->fences[preempt_seq];
+ fence = rcu_dereference_protected(*ptr, 1);
+
+no_preempt:
+ spin_lock(&sched->job_list_lock);
+ list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
+ if (dma_fence_is_signaled(&s_job->s_fence->finished)) {
+ /* remove job from ring_mirror_list */
+ list_del_init(&s_job->list);
+ sched->ops->free_job(s_job);
+ continue;
+ }
+ job = to_amdgpu_job(s_job);
+ if (preempted && (&job->hw_fence) == fence)
+ /* mark the job as preempted */
+ job->preemption_status |= AMDGPU_IB_PREEMPTED;
+ }
+ spin_unlock(&sched->job_list_lock);
+}
+
+static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
+{
+ int r, length;
+ struct amdgpu_ring *ring;
+ struct dma_fence **fences = NULL;
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+ if (val >= AMDGPU_MAX_RINGS)
+ return -EINVAL;
+
+ ring = adev->rings[val];
+
+ if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ return -EINVAL;
+
+ /* the last preemption failed */
+ if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
+ return -EBUSY;
+
+ length = ring->fence_drv.num_fences_mask + 1;
+ fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
+ if (!fences)
+ return -ENOMEM;
+
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ r = down_read_killable(&adev->reset_domain->sem);
+ if (r)
+ goto pro_end;
+
+ /* stop the scheduler */
+ kthread_park(ring->sched.thread);
+
+ /* preempt the IB */
+ r = amdgpu_ring_preempt_ib(ring);
+ if (r) {
+ DRM_WARN("failed to preempt ring %d\n", ring->idx);
+ goto failure;
+ }
+
+ amdgpu_fence_process(ring);
+
+ if (atomic_read(&ring->fence_drv.last_seq) !=
+ ring->fence_drv.sync_seq) {
+ DRM_INFO("ring %d was preempted\n", ring->idx);
+
+ amdgpu_ib_preempt_mark_partial_job(ring);
+
+ /* swap out the old fences */
+ amdgpu_ib_preempt_fences_swap(ring, fences);
+
+ amdgpu_fence_driver_force_completion(ring);
+
+ /* resubmit unfinished jobs */
+ amdgpu_ib_preempt_job_recovery(&ring->sched);
+
+ /* wait for jobs finished */
+ amdgpu_fence_wait_empty(ring);
+
+ /* signal the old fences */
+ amdgpu_ib_preempt_signal_fences(fences, length);
+ }
+
+failure:
+ /* restart the scheduler */
+ kthread_unpark(ring->sched.thread);
+
+ up_read(&adev->reset_domain->sem);
+
+pro_end:
+ kfree(fences);
+
+ return r;
+}
+
+static int amdgpu_debugfs_sclk_set(void *data, u64 val)
+{
+ int ret = 0;
+ uint32_t max_freq, min_freq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return ret;
+ }
+
+ ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK, &min_freq, &max_freq);
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ goto out;
+ }
+ if (ret || val > max_freq || val < min_freq) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK, (uint32_t)val, (uint32_t)val);
+ if (ret)
+ ret = -EINVAL;
+
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
+ amdgpu_debugfs_ib_preempt, "%llu\n");
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
+ amdgpu_debugfs_sclk_set, "%llu\n");
+
+static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
+ char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ char reg_offset[12];
+ int i, ret, len = 0;
+
+ if (*pos)
+ return 0;
+
+ memset(reg_offset, 0, 12);
+ ret = down_read_killable(&adev->reset_domain->sem);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < adev->num_regs; i++) {
+ sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]);
+ up_read(&adev->reset_domain->sem);
+ if (copy_to_user(buf + len, reg_offset, strlen(reg_offset)))
+ return -EFAULT;
+
+ len += strlen(reg_offset);
+ ret = down_read_killable(&adev->reset_domain->sem);
+ if (ret)
+ return ret;
+ }
+
+ up_read(&adev->reset_domain->sem);
+ *pos += len;
+
+ return len;
+}
+
+static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
+ const char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ char reg_offset[11];
+ uint32_t *new = NULL, *tmp = NULL;
+ int ret, i = 0, len = 0;
+
+ do {
+ memset(reg_offset, 0, 11);
+ if (copy_from_user(reg_offset, buf + len,
+ min(10, ((int)size-len)))) {
+ ret = -EFAULT;
+ goto error_free;
+ }
+
+ new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ goto error_free;
+ }
+ tmp = new;
+ if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) {
+ ret = -EINVAL;
+ goto error_free;
+ }
+
+ len += ret;
+ i++;
+ } while (len < size);
+
+ new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ goto error_free;
+ }
+ ret = down_write_killable(&adev->reset_domain->sem);
+ if (ret)
+ goto error_free;
+
+ swap(adev->reset_dump_reg_list, tmp);
+ swap(adev->reset_dump_reg_value, new);
+ adev->num_regs = i;
+ up_write(&adev->reset_domain->sem);
+ ret = size;
+
+error_free:
+ if (tmp != new)
+ kfree(tmp);
+ kfree(new);
+ return ret;
+}
+
+static const struct file_operations amdgpu_reset_dump_register_list = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_reset_dump_register_list_read,
+ .write = amdgpu_reset_dump_register_list_write,
+ .llseek = default_llseek
+};
+
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+ struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
+ struct dentry *ent;
+ int r, i;
+
+ if (!debugfs_initialized())
+ return 0;
+
+ debugfs_create_x32("amdgpu_smu_debug", 0600, root,
+ &adev->pm.smu_debug_mask);
+
+ ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
+ &fops_ib_preempt);
+ if (IS_ERR(ent)) {
+ DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
+ return PTR_ERR(ent);
+ }
+
+ ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev,
+ &fops_sclk_set);
+ if (IS_ERR(ent)) {
+ DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n");
+ return PTR_ERR(ent);
+ }
+
+ /* Register debugfs entries for amdgpu_ttm */
+ amdgpu_ttm_debugfs_init(adev);
+ amdgpu_debugfs_pm_init(adev);
+ amdgpu_debugfs_sa_init(adev);
+ amdgpu_debugfs_fence_init(adev);
+ amdgpu_debugfs_gem_init(adev);
+
+ r = amdgpu_debugfs_regs_init(adev);
+ if (r)
+ DRM_ERROR("registering register debugfs failed (%d).\n", r);
+
+ amdgpu_debugfs_firmware_init(adev);
+ amdgpu_ta_if_debugfs_init(adev);
+
+#if defined(CONFIG_DRM_AMD_DC)
+ if (adev->dc_enabled)
+ dtn_debugfs_init(adev);
+#endif
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring)
+ continue;
+
+ amdgpu_debugfs_ring_init(adev, ring);
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (!amdgpu_vcnfw_log)
+ break;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
+ }
+
+ amdgpu_ras_debugfs_create_all(adev);
+ amdgpu_rap_debugfs_init(adev);
+ amdgpu_securedisplay_debugfs_init(adev);
+ amdgpu_fw_attestation_debugfs_init(adev);
+
+ debugfs_create_file("amdgpu_evict_vram", 0444, root, adev,
+ &amdgpu_evict_vram_fops);
+ debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev,
+ &amdgpu_evict_gtt_fops);
+ debugfs_create_file("amdgpu_test_ib", 0444, root, adev,
+ &amdgpu_debugfs_test_ib_fops);
+ debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
+ &amdgpu_debugfs_vm_info_fops);
+ debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
+ &amdgpu_benchmark_fops);
+ debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev,
+ &amdgpu_reset_dump_register_list);
+
+ adev->debugfs_vbios_blob.data = adev->bios;
+ adev->debugfs_vbios_blob.size = adev->bios_size;
+ debugfs_create_blob("amdgpu_vbios", 0444, root,
+ &adev->debugfs_vbios_blob);
+
+ adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
+ adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
+ debugfs_create_blob("amdgpu_discovery", 0444, root,
+ &adev->debugfs_discovery_blob);
+
+ return 0;
+}
+
+#else
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
new file mode 100644
index 000000000..371a6f0de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/*
+ * Debugfs
+ */
+
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
new file mode 100644
index 000000000..56d99ffbb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -0,0 +1,6052 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#include <linux/power_supply.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/devcoredump.h>
+#include <generated/utsrelease.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
+
+#include <drm/drm_aperture.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
+#include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
+#include <linux/efi.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
+#include "amd_pcie.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "si.h"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#include "cik.h"
+#endif
+#include "vi.h"
+#include "soc15.h"
+#include "nv.h"
+#include "bif/bif_4_1_d.h"
+#include <linux/firmware.h>
+#include "amdgpu_vf_error.h"
+
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_pm.h"
+
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_pmu.h"
+#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_reset.h"
+
+#include <linux/suspend.h>
+#include <drm/task_barrier.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
+MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
+
+#define AMDGPU_RESUME_MS 2000
+#define AMDGPU_MAX_RETRY_LIMIT 2
+#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+
+static const struct drm_driver amdgpu_kms_driver;
+
+const char *amdgpu_asic_name[] = {
+ "TAHITI",
+ "PITCAIRN",
+ "VERDE",
+ "OLAND",
+ "HAINAN",
+ "BONAIRE",
+ "KAVERI",
+ "KABINI",
+ "HAWAII",
+ "MULLINS",
+ "TOPAZ",
+ "TONGA",
+ "FIJI",
+ "CARRIZO",
+ "STONEY",
+ "POLARIS10",
+ "POLARIS11",
+ "POLARIS12",
+ "VEGAM",
+ "VEGA10",
+ "VEGA12",
+ "VEGA20",
+ "RAVEN",
+ "ARCTURUS",
+ "RENOIR",
+ "ALDEBARAN",
+ "NAVI10",
+ "CYAN_SKILLFISH",
+ "NAVI14",
+ "NAVI12",
+ "SIENNA_CICHLID",
+ "NAVY_FLOUNDER",
+ "VANGOGH",
+ "DIMGREY_CAVEFISH",
+ "BEIGE_GOBY",
+ "YELLOW_CARP",
+ "IP DISCOVERY",
+ "LAST",
+};
+
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+ return sysfs_emit(buf, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, 0444,
+ amdgpu_device_get_pcie_replay_count, NULL);
+
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+
+
+/**
+ * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with ATPX power control,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_px(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
+ return true;
+ return false;
+}
+
+/**
+ * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with ACPI power control,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_boco(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (adev->has_pr3 ||
+ ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
+ return true;
+ return false;
+}
+
+/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device supporte BACO,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_baco(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ return amdgpu_asic_supports_baco(adev);
+}
+
+/**
+ * amdgpu_device_supports_smart_shift - Is the device dGPU with
+ * smart shift support
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with Smart Shift support,
+ * otherwise returns false.
+ */
+bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
+{
+ return (amdgpu_device_supports_boco(dev) &&
+ amdgpu_acpi_is_power_shift_control_supported());
+}
+
+/*
+ * VRAM access helper functions
+ */
+
+/**
+ * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ unsigned long flags;
+ uint32_t hi = ~0, tmp = 0;
+ uint32_t *data = buf;
+ uint64_t last;
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
+
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ for (last = pos + size; pos < last; pos += 4) {
+ tmp = pos >> 31;
+
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ if (tmp != hi) {
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ hi = tmp;
+ }
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *data++);
+ else
+ *data++ = RREG32_NO_KIQ(mmMM_DATA);
+ }
+
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_device_aper_access - access vram by vram aperature
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ *
+ * The return value means how many bytes have been transferred.
+ */
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+#ifdef CONFIG_64BIT
+ void __iomem *addr;
+ size_t count = 0;
+ uint64_t last;
+
+ if (!adev->mman.aper_base_kaddr)
+ return 0;
+
+ last = min(pos + size, adev->gmc.visible_vram_size);
+ if (last > pos) {
+ addr = adev->mman.aper_base_kaddr + pos;
+ count = last - pos;
+
+ if (write) {
+ memcpy_toio(addr, buf, count);
+ /* Make sure HDP write cache flush happens without any reordering
+ * after the system memory contents are sent over PCIe device
+ */
+ mb();
+ amdgpu_device_flush_hdp(adev, NULL);
+ } else {
+ amdgpu_device_invalidate_hdp(adev, NULL);
+ /* Make sure HDP read cache is invalidated before issuing a read
+ * to the PCIe device
+ */
+ mb();
+ memcpy_fromio(buf, addr, count);
+ }
+
+ }
+
+ return count;
+#else
+ return 0;
+#endif
+}
+
+/**
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ size_t count;
+
+ /* try to using vram apreature to access vram first */
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram */
+ pos += count;
+ buf += count;
+ amdgpu_device_mm_access(adev, pos, buf, size, write);
+ }
+}
+
+/*
+ * register access helper functions.
+ */
+
+/* Check if hw access should be skipped because of hotplug or device error */
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
+{
+ if (adev->no_hw_access)
+ return true;
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * This is a bit complicated to understand, so worth a comment. What we assert
+ * here is that the GPU reset is not running on another thread in parallel.
+ *
+ * For this we trylock the read side of the reset semaphore, if that succeeds
+ * we know that the reset is not running in paralell.
+ *
+ * If the trylock fails we assert that we are either already holding the read
+ * side of the lock or are the reset thread itself and hold the write side of
+ * the lock.
+ */
+ if (in_task()) {
+ if (down_read_trylock(&adev->reset_domain->sem))
+ up_read(&adev->reset_domain->sem);
+ else
+ lockdep_assert_held(&adev->reset_domain->sem);
+ }
+#endif
+ return false;
+}
+
+/**
+ * amdgpu_device_rreg - read a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags)
+{
+ uint32_t ret;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = adev->pcie_rreg(adev, reg * 4);
+ }
+
+ trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
+
+ return ret;
+}
+
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ */
+
+/**
+ * amdgpu_mm_rreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ *
+ * Returns the 8 bit value from the offset specified.
+ */
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (offset < adev->rmmio_size)
+ return (readb(adev->rmmio + offset));
+ BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ */
+
+/**
+ * amdgpu_mm_wreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ * @value: 8 bit value to write
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (offset < adev->rmmio_size)
+ writeb(value, adev->rmmio + offset);
+ else
+ BUG();
+}
+
+/**
+ * amdgpu_device_wreg - write to a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ adev->pcie_wreg(adev, reg * 4, v);
+ }
+
+ trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+}
+
+/**
+ * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: mmio/rlc register
+ * @v: value to write
+ *
+ * this function is invoked only for the debugfs register access
+ */
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t xcc_id)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (amdgpu_sriov_fullaccess(adev) &&
+ adev->gfx.rlc.funcs &&
+ adev->gfx.rlc.funcs->is_rlcg_access_range) {
+ if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
+ return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
+ } else if ((reg * 4) >= adev->rmmio_size) {
+ adev->pcie_wreg(adev, reg * 4, v);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+}
+
+/**
+ * amdgpu_device_indirect_rreg - read an indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+ u32 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u32 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ u32 r;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
+/**
+ * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
+ u32 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ r |= ((u64)readl(pcie_data_offset) << 32);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
+/**
+ * amdgpu_device_indirect_wreg - write an indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
+ u32 reg_addr, u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u32 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ else
+ pcie_index_hi = 0;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel(reg_data, pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
+ u32 reg_addr, u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_rev_id(adev);
+}
+
+/**
+ * amdgpu_invalid_rreg - dummy reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
+ BUG();
+ return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
+ reg, v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
+{
+ DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
+ reg, v);
+ BUG();
+}
+
+/**
+ * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
+{
+ DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ BUG();
+ return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg64 - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+ DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
+/**
+ * amdgpu_block_invalid_rreg - dummy reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @block: offset of instance
+ * @reg: offset of register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
+ uint32_t block, uint32_t reg)
+{
+ DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
+ reg, block);
+ BUG();
+ return 0;
+}
+
+/**
+ * amdgpu_block_invalid_wreg - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @block: offset of instance
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function. Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
+ uint32_t block,
+ uint32_t reg, uint32_t v)
+{
+ DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
+ reg, block, v);
+ BUG();
+}
+
+/**
+ * amdgpu_device_asic_init - Wrapper for atom asic_init
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Does any asic specific work and then calls atom asic init.
+ */
+static int amdgpu_device_asic_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ amdgpu_asic_pre_asic_init(adev);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
+ adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ amdgpu_psp_wait_for_bootloader(adev);
+ ret = amdgpu_atomfirmware_asic_init(adev, true);
+ return ret;
+ } else {
+ return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocates a scratch page of VRAM for use by various things in the
+ * driver.
+ */
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
+{
+ return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mem_scratch.robj,
+ &adev->mem_scratch.gpu_addr,
+ (void **)&adev->mem_scratch.ptr);
+}
+
+/**
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the VRAM scratch page.
+ */
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
+}
+
+/**
+ * amdgpu_device_program_register_sequence - program an array of registers.
+ *
+ * @adev: amdgpu_device pointer
+ * @registers: pointer to the register array
+ * @array_size: size of the register array
+ *
+ * Programs an array or registers with and or masks.
+ * This is a helper for setting golden registers.
+ */
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
+ const u32 *registers,
+ const u32 array_size)
+{
+ u32 tmp, reg, and_mask, or_mask;
+ int i;
+
+ if (array_size % 3)
+ return;
+
+ for (i = 0; i < array_size; i += 3) {
+ reg = registers[i + 0];
+ and_mask = registers[i + 1];
+ or_mask = registers[i + 2];
+
+ if (and_mask == 0xffffffff) {
+ tmp = or_mask;
+ } else {
+ tmp = RREG32(reg);
+ tmp &= ~and_mask;
+ if (adev->family >= AMDGPU_FAMILY_AI)
+ tmp |= (or_mask & and_mask);
+ else
+ tmp |= or_mask;
+ }
+ WREG32(reg, tmp);
+ }
+}
+
+/**
+ * amdgpu_device_pci_config_reset - reset the GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using the pci config reset sequence.
+ * Only applicable to asics prior to vega10.
+ */
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
+{
+ pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
+}
+
+/**
+ * amdgpu_device_pci_reset - reset the GPU using generic PCI means
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
+ */
+int amdgpu_device_pci_reset(struct amdgpu_device *adev)
+{
+ return pci_reset_function(adev->pdev);
+}
+
+/*
+ * amdgpu_device_wb_*()
+ * Writeback is the method by which the GPU updates special pages in memory
+ * with the status of certain GPU events (fences, ring pointers,etc.).
+ */
+
+/**
+ * amdgpu_device_wb_fini - Disable Writeback and free memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver shutdown.
+ */
+static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
+{
+ if (adev->wb.wb_obj) {
+ amdgpu_bo_free_kernel(&adev->wb.wb_obj,
+ &adev->wb.gpu_addr,
+ (void **)&adev->wb.wb);
+ adev->wb.wb_obj = NULL;
+ }
+}
+
+/**
+ * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initializes writeback and allocates writeback memory (all asics).
+ * Used at driver startup.
+ * Returns 0 on success or an -error on failure.
+ */
+static int amdgpu_device_wb_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->wb.wb_obj == NULL) {
+ /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->wb.wb_obj, &adev->wb.gpu_addr,
+ (void **)&adev->wb.wb);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
+ return r;
+ }
+
+ adev->wb.num_wb = AMDGPU_MAX_WB;
+ memset(&adev->wb.used, 0, sizeof(adev->wb.used));
+
+ /* clear wb memory */
+ memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_wb_get - Allocate a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Allocate a wb slot for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
+{
+ unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+
+ if (offset < adev->wb.num_wb) {
+ __set_bit(offset, adev->wb.used);
+ *wb = offset << 3; /* convert to dw offset */
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * amdgpu_device_wb_free - Free a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Free a wb slot allocated for use by the driver (all asics)
+ */
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
+{
+ wb >>= 3;
+ if (wb < adev->wb.num_wb)
+ __clear_bit(wb, adev->wb.used);
+}
+
+/**
+ * amdgpu_device_resize_fb_bar - try to resize FB BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
+ * to fail, but if any of the BARs is not accessible after the size we abort
+ * driver loading by returning -ENODEV.
+ */
+int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
+{
+ int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
+ struct pci_bus *root;
+ struct resource *res;
+ unsigned int i;
+ u16 cmd;
+ int r;
+
+ if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ return 0;
+
+ /* Bypass for VF */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /* skip if the bios has already enabled large BAR */
+ if (adev->gmc.real_vram_size &&
+ (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
+ return 0;
+
+ /* Check if the root BUS has 64bit memory resources */
+ root = adev->pdev->bus;
+ while (root->parent)
+ root = root->parent;
+
+ pci_bus_for_each_resource(root, res, i) {
+ if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+ res->start > 0x100000000ull)
+ break;
+ }
+
+ /* Trying to resize is pointless without a root hub window above 4GB */
+ if (!res)
+ return 0;
+
+ /* Limit the BAR size to what is available */
+ rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
+ rbar_size);
+
+ /* Disable memory decoding while we change the BAR addresses and size */
+ pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
+ pci_write_config_word(adev->pdev, PCI_COMMAND,
+ cmd & ~PCI_COMMAND_MEMORY);
+
+ /* Free the VRAM and doorbell BAR, we most likely need to move both. */
+ amdgpu_doorbell_fini(adev);
+ if (adev->asic_type >= CHIP_BONAIRE)
+ pci_release_resource(adev->pdev, 2);
+
+ pci_release_resource(adev->pdev, 0);
+
+ r = pci_resize_resource(adev->pdev, 0, rbar_size);
+ if (r == -ENOSPC)
+ DRM_INFO("Not enough PCI address space for a large BAR.");
+ else if (r && r != -ENOTSUPP)
+ DRM_ERROR("Problem resizing BAR0 (%d).", r);
+
+ pci_assign_unassigned_bus_resources(adev->pdev->bus);
+
+ /* When the doorbell or fb BAR isn't available we have no chance of
+ * using the device.
+ */
+ r = amdgpu_doorbell_init(adev);
+ if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
+ return -ENODEV;
+
+ pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
+
+ return 0;
+}
+
+static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+ return false;
+
+ return true;
+}
+
+/*
+ * GPU helpers function.
+ */
+/**
+ * amdgpu_device_need_post - check if the hw need post or not
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check if the asic has been initialized (all asics) at driver startup
+ * or post is needed if hw reset is performed.
+ * Returns true if need or false if not.
+ */
+bool amdgpu_device_need_post(struct amdgpu_device *adev)
+{
+ uint32_t reg;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ if (!amdgpu_device_read_bios(adev))
+ return false;
+
+ if (amdgpu_passthrough(adev)) {
+ /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
+ * some old smc fw still need driver do vPost otherwise gpu hang, while
+ * those smc fw version above 22.15 doesn't have this flaw, so we force
+ * vpost executed for smc version below 22.15
+ */
+ if (adev->asic_type == CHIP_FIJI) {
+ int err;
+ uint32_t fw_ver;
+
+ err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
+ /* force vPost if error occured */
+ if (err)
+ return true;
+
+ fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+ if (fw_ver < 0x00160e00)
+ return true;
+ }
+ }
+
+ /* Don't post if we need to reset whole hive on init */
+ if (adev->gmc.xgmi.pending_reset)
+ return false;
+
+ if (adev->has_hw_reset) {
+ adev->has_hw_reset = false;
+ return true;
+ }
+
+ /* bios scratch used on CIK+ */
+ if (adev->asic_type >= CHIP_BONAIRE)
+ return amdgpu_atombios_scratch_need_asic_init(adev);
+
+ /* check MEM_SIZE for older asics */
+ reg = amdgpu_asic_get_config_memsize(adev);
+
+ if ((reg != 0) && (reg != 0xffffffff))
+ return false;
+
+ return true;
+}
+
+/*
+ * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
+ * speed switching. Until we have confirmation from Intel that a specific host
+ * supports it, it's safer that we keep it disabled for all.
+ *
+ * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+bool amdgpu_device_pcie_dynamic_switching_supported(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (c->x86_vendor == X86_VENDOR_INTEL)
+ return false;
+#endif
+ return true;
+}
+
+/**
+ * amdgpu_device_should_use_aspm - check if the device should program ASPM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Confirm whether the module parameter and pcie bridge agree that ASPM should
+ * be set for this device.
+ *
+ * Returns true if it should be used or false if not.
+ */
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
+{
+ switch (amdgpu_aspm) {
+ case -1:
+ break;
+ case 0:
+ return false;
+ case 1:
+ return true;
+ default:
+ return false;
+ }
+ return pcie_aspm_enabled(adev->pdev);
+}
+
+bool amdgpu_device_aspm_support_quirk(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+ return true;
+#endif
+}
+
+/* if we get transitioned to only one device, take VGA back */
+/**
+ * amdgpu_device_vga_set_decode - enable/disable vga decode
+ *
+ * @pdev: PCI device pointer
+ * @state: enable/disable vga decode
+ *
+ * Enable/disable vga decode (all asics).
+ * Returns VGA resource flags.
+ */
+static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
+ bool state)
+{
+ struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
+
+ amdgpu_asic_set_vga_state(adev, state);
+ if (state)
+ return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+ VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+ else
+ return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
+
+/**
+ * amdgpu_device_check_block_size - validate the vm block size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm block size specified via module parameter.
+ * The vm block size defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory.
+ */
+static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
+{
+ /* defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory
+ */
+ if (amdgpu_vm_block_size == -1)
+ return;
+
+ if (amdgpu_vm_block_size < 9) {
+ dev_warn(adev->dev, "VM page table size (%d) too small\n",
+ amdgpu_vm_block_size);
+ amdgpu_vm_block_size = -1;
+ }
+}
+
+/**
+ * amdgpu_device_check_vm_size - validate the vm size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm size in GB specified via module parameter.
+ * The VM size is the size of the GPU virtual memory space in GB.
+ */
+static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
+{
+ /* no need to check the default value */
+ if (amdgpu_vm_size == -1)
+ return;
+
+ if (amdgpu_vm_size < 1) {
+ dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
+ amdgpu_vm_size);
+ amdgpu_vm_size = -1;
+ }
+}
+
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+ struct sysinfo si;
+ bool is_os_64 = (sizeof(void *) == 8);
+ uint64_t total_memory;
+ uint64_t dram_size_seven_GB = 0x1B8000000;
+ uint64_t dram_size_three_GB = 0xB8000000;
+
+ if (amdgpu_smu_memory_pool_size == 0)
+ return;
+
+ if (!is_os_64) {
+ DRM_WARN("Not 64-bit OS, feature not supported\n");
+ goto def_value;
+ }
+ si_meminfo(&si);
+ total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+ if ((amdgpu_smu_memory_pool_size == 1) ||
+ (amdgpu_smu_memory_pool_size == 2)) {
+ if (total_memory < dram_size_three_GB)
+ goto def_value1;
+ } else if ((amdgpu_smu_memory_pool_size == 4) ||
+ (amdgpu_smu_memory_pool_size == 8)) {
+ if (total_memory < dram_size_seven_GB)
+ goto def_value1;
+ } else {
+ DRM_WARN("Smu memory pool size not supported\n");
+ goto def_value;
+ }
+ adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+ return;
+
+def_value1:
+ DRM_WARN("No enough system memory\n");
+def_value:
+ adev->pm.smu_prv_buffer_size = 0;
+}
+
+static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
+{
+ if (!(adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_RAVEN)
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ if (adev->pdev->device == 0x15dd)
+ adev->apu_flags |= AMD_APU_IS_RAVEN;
+ if (adev->pdev->device == 0x15d8)
+ adev->apu_flags |= AMD_APU_IS_PICASSO;
+ break;
+ case CHIP_RENOIR:
+ if ((adev->pdev->device == 0x1636) ||
+ (adev->pdev->device == 0x164c))
+ adev->apu_flags |= AMD_APU_IS_RENOIR;
+ else
+ adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
+ break;
+ case CHIP_VANGOGH:
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ break;
+ case CHIP_YELLOW_CARP:
+ break;
+ case CHIP_CYAN_SKILLFISH:
+ if ((adev->pdev->device == 0x13FE) ||
+ (adev->pdev->device == 0x143F))
+ adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_check_arguments - validate module params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates certain module parameters and updates
+ * the associated values used by the driver (all asics).
+ */
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
+{
+ if (amdgpu_sched_jobs < 4) {
+ dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
+ amdgpu_sched_jobs);
+ amdgpu_sched_jobs = 4;
+ } else if (!is_power_of_2(amdgpu_sched_jobs)) {
+ dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
+ amdgpu_sched_jobs);
+ amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
+ }
+
+ if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
+ /* gart size must be greater or equal to 32M */
+ dev_warn(adev->dev, "gart size (%d) too small\n",
+ amdgpu_gart_size);
+ amdgpu_gart_size = -1;
+ }
+
+ if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
+ /* gtt size must be greater or equal to 32M */
+ dev_warn(adev->dev, "gtt size (%d) too small\n",
+ amdgpu_gtt_size);
+ amdgpu_gtt_size = -1;
+ }
+
+ /* valid range is between 4 and 9 inclusive */
+ if (amdgpu_vm_fragment_size != -1 &&
+ (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
+ dev_warn(adev->dev, "valid range is between 4 and 9\n");
+ amdgpu_vm_fragment_size = -1;
+ }
+
+ if (amdgpu_sched_hw_submission < 2) {
+ dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
+ amdgpu_sched_hw_submission);
+ amdgpu_sched_hw_submission = 2;
+ } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
+ dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
+ amdgpu_sched_hw_submission);
+ amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
+ }
+
+ if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
+ dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
+ amdgpu_reset_method = -1;
+ }
+
+ amdgpu_device_check_smu_prv_buffer_size(adev);
+
+ amdgpu_device_check_vm_size(adev);
+
+ amdgpu_device_check_block_size(adev);
+
+ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+ return 0;
+}
+
+/**
+ * amdgpu_switcheroo_set_state - set switcheroo state
+ *
+ * @pdev: pci dev pointer
+ * @state: vga_switcheroo state
+ *
+ * Callback for the switcheroo driver. Suspends or resumes
+ * the asics before or after it is powered up using ACPI methods.
+ */
+static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
+ enum vga_switcheroo_state state)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ int r;
+
+ if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
+ return;
+
+ if (state == VGA_SWITCHEROO_ON) {
+ pr_info("switched on\n");
+ /* don't suspend or resume card normally */
+ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+ pci_set_power_state(pdev, PCI_D0);
+ amdgpu_device_load_pci_state(pdev);
+ r = pci_enable_device(pdev);
+ if (r)
+ DRM_WARN("pci_enable_device failed (%d)\n", r);
+ amdgpu_device_resume(dev, true);
+
+ dev->switch_power_state = DRM_SWITCH_POWER_ON;
+ } else {
+ pr_info("switched off\n");
+ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_suspend(dev, true);
+ amdgpu_device_cache_pci_state(pdev);
+ /* Shut down the device */
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+ dev->switch_power_state = DRM_SWITCH_POWER_OFF;
+ }
+}
+
+/**
+ * amdgpu_switcheroo_can_switch - see if switcheroo state can change
+ *
+ * @pdev: pci dev pointer
+ *
+ * Callback for the switcheroo driver. Check of the switcheroo
+ * state can be changed.
+ * Returns true if the state can be changed, false if not.
+ */
+static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+
+ /*
+ * FIXME: open_count is protected by drm_global_mutex but that would lead to
+ * locking inversion with the driver load path. And the access here is
+ * completely racy anyway. So don't bother with locking for now.
+ */
+ return atomic_read(&dev->open_count) == 0;
+}
+
+static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
+ .set_gpu_state = amdgpu_switcheroo_set_state,
+ .reprobe = NULL,
+ .can_switch = amdgpu_switcheroo_can_switch,
+};
+
+/**
+ * amdgpu_device_ip_set_clockgating_state - set the CG state
+ *
+ * @dev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: clockgating state (gate or ungate)
+ *
+ * Sets the requested clockgating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_clockgating_state(void *dev,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = dev;
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
+ (void *)adev, state);
+ if (r)
+ DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ return r;
+}
+
+/**
+ * amdgpu_device_ip_set_powergating_state - set the PG state
+ *
+ * @dev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: powergating state (gate or ungate)
+ *
+ * Sets the requested powergating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_powergating_state(void *dev,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = dev;
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(
+ (void *)adev, state);
+ if (r)
+ DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ return r;
+}
+
+/**
+ * amdgpu_device_ip_get_clockgating_state - get the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: clockgating feature flags
+ *
+ * Walks the list of IPs on the device and updates the clockgating
+ * flags for each IP.
+ * Updates @flags with the feature flags for each hardware IP where
+ * clockgating is enabled.
+ */
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
+ adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
+ }
+}
+
+/**
+ * amdgpu_device_ip_wait_for_idle - wait for idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Waits for the request hardware IP to be idle.
+ * Returns 0 for success or a negative error code on failure.
+ */
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type == block_type) {
+ r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
+ if (r)
+ return r;
+ break;
+ }
+ }
+ return 0;
+
+}
+
+/**
+ * amdgpu_device_ip_is_idle - is the hardware IP idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is idle or not.
+ * Returns true if it the IP is idle, false if not.
+ */
+bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type == block_type)
+ return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
+ }
+ return true;
+
+}
+
+/**
+ * amdgpu_device_ip_get_ip_block - get a hw IP pointer
+ *
+ * @adev: amdgpu_device pointer
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Returns a pointer to the hardware IP block structure
+ * if it exists for the asic, otherwise NULL.
+ */
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+ enum amd_ip_block_type type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type)
+ return &adev->ip_blocks[i];
+
+ return NULL;
+}
+
+/**
+ * amdgpu_device_ip_block_version_cmp
+ *
+ * @adev: amdgpu_device pointer
+ * @type: enum amd_ip_block_type
+ * @major: major version
+ * @minor: minor version
+ *
+ * return 0 if equal or greater
+ * return 1 if smaller or the ip_block doesn't exist
+ */
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+ enum amd_ip_block_type type,
+ u32 major, u32 minor)
+{
+ struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
+
+ if (ip_block && ((ip_block->version->major > major) ||
+ ((ip_block->version->major == major) &&
+ (ip_block->version->minor >= minor))))
+ return 0;
+
+ return 1;
+}
+
+/**
+ * amdgpu_device_ip_block_add
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_block_version: pointer to the IP to add
+ *
+ * Adds the IP block driver information to the collection of IPs
+ * on the asic.
+ */
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+ const struct amdgpu_ip_block_version *ip_block_version)
+{
+ if (!ip_block_version)
+ return -EINVAL;
+
+ switch (ip_block_version->type) {
+ case AMD_IP_BLOCK_TYPE_VCN:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+ return 0;
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+ return 0;
+ break;
+ default:
+ break;
+ }
+
+ DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
+ ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_enable_virtual_display - enable virtual display feature
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enabled the virtual display feature if the user has enabled it via
+ * the module parameter virtual_display. This feature provides a virtual
+ * display hardware on headless boards or in virtualized environments.
+ * This function parses and validates the configuration string specified by
+ * the user and configues the virtual display configuration (number of
+ * virtual connectors, crtcs, etc.) specified.
+ */
+static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
+{
+ adev->enable_virtual_display = false;
+
+ if (amdgpu_virtual_display) {
+ const char *pci_address_name = pci_name(adev->pdev);
+ char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
+
+ pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
+ pciaddstr_tmp = pciaddstr;
+ while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
+ pciaddname = strsep(&pciaddname_tmp, ",");
+ if (!strcmp("all", pciaddname)
+ || !strcmp(pci_address_name, pciaddname)) {
+ long num_crtc;
+ int res = -1;
+
+ adev->enable_virtual_display = true;
+
+ if (pciaddname_tmp)
+ res = kstrtol(pciaddname_tmp, 10,
+ &num_crtc);
+
+ if (!res) {
+ if (num_crtc < 1)
+ num_crtc = 1;
+ if (num_crtc > 6)
+ num_crtc = 6;
+ adev->mode_info.num_crtc = num_crtc;
+ } else {
+ adev->mode_info.num_crtc = 1;
+ }
+ break;
+ }
+ }
+
+ DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+ amdgpu_virtual_display, pci_address_name,
+ adev->enable_virtual_display, adev->mode_info.num_crtc);
+
+ kfree(pciaddstr);
+ }
+}
+
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ DRM_INFO("virtual_display:%d, num_crtc:%d\n",
+ adev->enable_virtual_display, adev->mode_info.num_crtc);
+ }
+}
+
+/**
+ * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Parses the asic configuration parameters specified in the gpu info
+ * firmware and makes them availale to the driver for use in configuring
+ * the asic.
+ * Returns 0 on success, -EINVAL on failure.
+ */
+static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[40];
+ int err;
+ const struct gpu_info_firmware_header_v1_0 *hdr;
+
+ adev->firmware.gpu_info_fw = NULL;
+
+ if (adev->mman.discovery_bin)
+ return 0;
+
+ switch (adev->asic_type) {
+ default:
+ return 0;
+ case CHIP_VEGA10:
+ chip_name = "vega10";
+ break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ chip_name = "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
+ break;
+ case CHIP_ARCTURUS:
+ chip_name = "arcturus";
+ break;
+ case CHIP_NAVI12:
+ chip_name = "navi12";
+ break;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
+ if (err) {
+ dev_err(adev->dev,
+ "Failed to get gpu_info firmware \"%s\"\n",
+ fw_name);
+ goto out;
+ }
+
+ hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
+ amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
+
+ switch (hdr->version_major) {
+ case 1:
+ {
+ const struct gpu_info_firmware_v1_0 *gpu_info_fw =
+ (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ /*
+ * Should be droped when DAL no longer needs it.
+ */
+ if (adev->asic_type == CHIP_NAVI12)
+ goto parse_soc_bounding_box;
+
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches =
+ le32_to_cpu(gpu_info_fw->gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd =
+ le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu =
+ le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
+ if (hdr->version_minor >= 1) {
+ const struct gpu_info_firmware_v1_1 *gpu_info_fw =
+ (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ adev->gfx.config.num_sc_per_sh =
+ le32_to_cpu(gpu_info_fw->num_sc_per_sh);
+ adev->gfx.config.num_packer_per_sc =
+ le32_to_cpu(gpu_info_fw->num_packer_per_sc);
+ }
+
+parse_soc_bounding_box:
+ /*
+ * soc bounding box info is not integrated in disocovery table,
+ * we always need to parse it from gpu info firmware if needed.
+ */
+ if (hdr->version_minor == 2) {
+ const struct gpu_info_firmware_v1_2 *gpu_info_fw =
+ (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
+ }
+ break;
+ }
+ default:
+ dev_err(adev->dev,
+ "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
+ err = -EINVAL;
+ goto out;
+ }
+out:
+ return err;
+}
+
+/**
+ * amdgpu_device_ip_early_init - run early init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Early initialization pass for hardware IPs. The hardware IPs that make
+ * up each asic are discovered each IP's early_init callback is run. This
+ * is the first stage in initializing the asic.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
+{
+ struct pci_dev *parent;
+ int i, r;
+ bool total;
+
+ amdgpu_device_enable_virtual_display(adev);
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ adev->family = AMDGPU_FAMILY_SI;
+ r = si_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ if (adev->flags & AMD_IS_APU)
+ adev->family = AMDGPU_FAMILY_KV;
+ else
+ adev->family = AMDGPU_FAMILY_CI;
+
+ r = cik_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
+#endif
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ if (adev->flags & AMD_IS_APU)
+ adev->family = AMDGPU_FAMILY_CZ;
+ else
+ adev->family = AMDGPU_FAMILY_VI;
+
+ r = vi_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
+ default:
+ r = amdgpu_discovery_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
+ }
+
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ ((adev->flags & AMD_IS_APU) == 0) &&
+ !dev_is_removable(&adev->pdev->dev))
+ adev->flags |= AMD_IS_PX;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ parent = pcie_find_root_port(adev->pdev);
+ adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+ }
+
+
+ adev->pm.pp_feature = amdgpu_pp_feature_mask;
+ if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
+ adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported())
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+
+ total = true;
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
+ DRM_WARN("disabled ip block: %d <%s>\n",
+ i, adev->ip_blocks[i].version->funcs->name);
+ adev->ip_blocks[i].status.valid = false;
+ } else {
+ if (adev->ip_blocks[i].version->funcs->early_init) {
+ r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
+ if (r == -ENOENT) {
+ adev->ip_blocks[i].status.valid = false;
+ } else if (r) {
+ DRM_ERROR("early_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ total = false;
+ } else {
+ adev->ip_blocks[i].status.valid = true;
+ }
+ } else {
+ adev->ip_blocks[i].status.valid = true;
+ }
+ }
+ /* get the vbios after the asic_funcs are set up */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ r = amdgpu_device_parse_gpu_info_fw(adev);
+ if (r)
+ return r;
+
+ /* Read BIOS */
+ if (amdgpu_device_read_bios(adev)) {
+ if (!amdgpu_get_bios(adev))
+ return -EINVAL;
+
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+ return r;
+ }
+ }
+
+ /*get pf2vf msg info at it's earliest time*/
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
+ }
+ }
+ if (!total)
+ return -ENODEV;
+
+ amdgpu_amdkfd_device_probe(adev);
+ adev->cg_flags &= amdgpu_cg_mask;
+ adev->pg_flags &= amdgpu_pg_mask;
+
+ return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+ if (adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+ if (adev->ip_blocks[i].status.hw)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ return 0;
+}
+
+static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
+{
+ int r = 0;
+ int i;
+ uint32_t smu_version;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+
+ /* no need to do the fw loading again if already done*/
+ if (adev->ip_blocks[i].status.hw == true)
+ break;
+
+ if (amdgpu_in_reset(adev) || adev->in_suspend) {
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ } else {
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+
+ adev->ip_blocks[i].status.hw = true;
+ break;
+ }
+ }
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+ r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+
+ return r;
+}
+
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ ring->num_hw_submission, 0,
+ timeout, adev->reset_domain->wq,
+ ring->sched_score, ring->name,
+ adev->dev);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ }
+
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+
+/**
+ * amdgpu_device_ip_init - run init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main initialization pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the sw_init and hw_init callbacks
+ * are run. sw_init initializes the software state associated with each IP
+ * and hw_init initializes the hardware associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ r = amdgpu_ras_init(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
+ if (r) {
+ DRM_ERROR("sw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.sw = true;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* need to do common hw init early so everything is set up for gmc */
+ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ if (r) {
+ DRM_ERROR("hw_init %d failed %d\n", i, r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ /* need to do gmc hw init early so we can allocate gpu mem */
+ /* Try to reserve bad pages early */
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_exchange_data(adev);
+
+ r = amdgpu_device_mem_scratch_init(adev);
+ if (r) {
+ DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
+ goto init_failed;
+ }
+ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ if (r) {
+ DRM_ERROR("hw_init %d failed %d\n", i, r);
+ goto init_failed;
+ }
+ r = amdgpu_device_wb_init(adev);
+ if (r) {
+ DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
+ goto init_failed;
+ }
+ adev->ip_blocks[i].status.hw = true;
+
+ /* right after GMC hw init, we create CSA */
+ if (adev->gfx.mcbp) {
+ r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_CSA_SIZE);
+ if (r) {
+ DRM_ERROR("allocate CSA failed %d\n", r);
+ goto init_failed;
+ }
+ }
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
+ r = amdgpu_ib_pool_init(adev);
+ if (r) {
+ dev_err(adev->dev, "IB initialization failed (%d).\n", r);
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
+ goto init_failed;
+ }
+
+ r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
+ if (r)
+ goto init_failed;
+
+ r = amdgpu_device_ip_hw_init_phase1(adev);
+ if (r)
+ goto init_failed;
+
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ goto init_failed;
+
+ r = amdgpu_device_ip_hw_init_phase2(adev);
+ if (r)
+ goto init_failed;
+
+ /*
+ * retired pages will be loaded from eeprom and reserved here,
+ * it should be called after amdgpu_device_ip_hw_init_phase2 since
+ * for some ASICs the RAS EEPROM code relies on SMU fully functioning
+ * for I2C communication which only true at this point.
+ *
+ * amdgpu_ras_recovery_init may fail, but the upper only cares the
+ * failure from bad gpu situation and stop amdgpu init process
+ * accordingly. For other failed cases, it will still release all
+ * the resource and print error message, rather than returning one
+ * negative value to upper level.
+ *
+ * Note: theoretically, this should be called before all vram allocations
+ * to protect retired page from abusing
+ */
+ r = amdgpu_ras_recovery_init(adev);
+ if (r)
+ goto init_failed;
+
+ /**
+ * In case of XGMI grab extra reference for reset domain for this device
+ */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (amdgpu_xgmi_add_device(adev) == 0) {
+ if (!amdgpu_sriov_vf(adev)) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ if (WARN_ON(!hive)) {
+ r = -ENOENT;
+ goto init_failed;
+ }
+
+ if (!hive->reset_domain ||
+ !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
+ goto init_failed;
+ }
+
+ /* Drop the early temporary reset domain we created for device */
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
+ }
+ }
+ }
+
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
+
+ /* Don't init kfd if whole hive need to be reset during init */
+ if (!adev->gmc.xgmi.pending_reset) {
+ kgd2kfd_init_zone_device(adev);
+ amdgpu_amdkfd_device_init(adev);
+ }
+
+ amdgpu_fru_get_product_info(adev);
+
+init_failed:
+
+ return r;
+}
+
+/**
+ * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Writes a reset magic value to the gart pointer in VRAM. The driver calls
+ * this function before a GPU reset. If the value is retained after a
+ * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ */
+static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
+{
+ memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+/**
+ * amdgpu_device_check_vram_lost - check if vram is valid
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Checks the reset magic value written to the gart pointer in VRAM.
+ * The driver calls this after a GPU reset to see if the contents of
+ * VRAM is lost or now.
+ * returns true if vram is lost, false if not.
+ */
+static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
+{
+ if (memcmp(adev->gart.ptr, adev->reset_magic,
+ AMDGPU_RESET_MAGIC_NUM))
+ return true;
+
+ if (!amdgpu_in_reset(adev))
+ return false;
+
+ /*
+ * For all ASICs with baco/mode1 reset, the VRAM is
+ * always assumed to be lost.
+ */
+ switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_BACO:
+ case AMD_RESET_METHOD_MODE1:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * amdgpu_device_set_cg_state - set clockgating for amdgpu device
+ *
+ * @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * set_clockgating_state callbacks are run.
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * Fini or suspend, pass disabling clockgating for hardware IPs.
+ * Returns 0 on success, negative error code on failure.
+ */
+
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ int i, j, r;
+
+ if (amdgpu_emu_mode == 1)
+ return 0;
+
+ for (j = 0; j < adev->num_ip_blocks; j++) {
+ i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+ if (!adev->ip_blocks[i].status.late_initialized)
+ continue;
+ /* skip CG for GFX, SDMA on S0ix */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ /* skip CG for VCE/UVD, it's handled specially */
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
+ adev->ip_blocks[i].version->funcs->set_clockgating_state) {
+ /* enable clockgating to save power */
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ state);
+ if (r) {
+ DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+ enum amd_powergating_state state)
+{
+ int i, j, r;
+
+ if (amdgpu_emu_mode == 1)
+ return 0;
+
+ for (j = 0; j < adev->num_ip_blocks; j++) {
+ i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+ if (!adev->ip_blocks[i].status.late_initialized)
+ continue;
+ /* skip PG for GFX, SDMA on S0ix */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ /* skip CG for VCE/UVD, it's handled specially */
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
+ adev->ip_blocks[i].version->funcs->set_powergating_state) {
+ /* enable powergating to save power */
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ state);
+ if (r) {
+ DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ }
+ return 0;
+}
+
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+ struct amdgpu_gpu_instance *gpu_ins;
+ struct amdgpu_device *adev;
+ int i, ret = 0;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * MGPU fan boost feature should be enabled
+ * only when there are two or more dGPUs in
+ * the system
+ */
+ if (mgpu_info.num_dgpu < 2)
+ goto out;
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ gpu_ins = &(mgpu_info.gpu_ins[i]);
+ adev = gpu_ins->adev;
+ if (!(adev->flags & AMD_IS_APU) &&
+ !gpu_ins->mgpu_fan_enabled) {
+ ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+ if (ret)
+ break;
+
+ gpu_ins->mgpu_fan_enabled = 1;
+ }
+ }
+
+out:
+ mutex_unlock(&mgpu_info.mutex);
+
+ return ret;
+}
+
+/**
+ * amdgpu_device_ip_late_init - run late init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the late_init callbacks are run.
+ * late_init covers any special initialization that an IP requires
+ * after all of the have been initialized or something that needs to happen
+ * late in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_gpu_instance *gpu_instance;
+ int i = 0, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+ if (r) {
+ DRM_ERROR("late_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ r = amdgpu_ras_late_init(adev);
+ if (r) {
+ DRM_ERROR("amdgpu_ras_late_init failed %d", r);
+ return r;
+ }
+
+ amdgpu_ras_set_error_query_ready(adev, true);
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ amdgpu_device_fill_reset_magic(adev);
+
+ r = amdgpu_device_enable_mgpu_fan_boost();
+ if (r)
+ DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
+ if (amdgpu_passthrough(adev) &&
+ ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+ adev->asic_type == CHIP_ALDEBARAN))
+ amdgpu_dpm_handle_passthrough_sbr(adev, true);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
+ AMDGPU_XGMI_PSTATE_MIN);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_smu_fini_early - smu hw_fini wrapper
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * For ASICs need to disable SMC first
+ */
+static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ return;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
+ /* XXX handle errors */
+ if (r) {
+ DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ adev->ip_blocks[i].status.hw = false;
+ break;
+ }
+ }
+}
+
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].version->funcs->early_fini)
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+ if (r) {
+ DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ }
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ amdgpu_amdkfd_suspend(adev, false);
+
+ /* Workaroud for ASICs need to disable SMC first */
+ amdgpu_device_smu_fini_early(adev);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
+ /* XXX handle errors */
+ if (r) {
+ DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_release_full_gpu(adev, false))
+ DRM_ERROR("failed to release exclusive mode on fini\n");
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run. hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+ amdgpu_virt_release_ras_err_handler_data(adev);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ amdgpu_xgmi_remove_device(adev);
+
+ amdgpu_amdkfd_device_fini_sw(adev);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ amdgpu_ucode_free_bo(adev);
+ amdgpu_free_static_csa(&adev->virt.csa_obj);
+ amdgpu_device_wb_fini(adev);
+ amdgpu_device_mem_scratch_fini(adev);
+ amdgpu_ib_pool_fini(adev);
+ }
+
+ r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
+ /* XXX handle errors */
+ if (r) {
+ DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ adev->ip_blocks[i].status.sw = false;
+ adev->ip_blocks[i].status.valid = false;
+ }
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.late_initialized)
+ continue;
+ if (adev->ip_blocks[i].version->funcs->late_fini)
+ adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+ adev->ip_blocks[i].status.late_initialized = false;
+ }
+
+ amdgpu_ras_fini(adev);
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
+ *
+ * @work: work_struct.
+ */
+static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, delayed_init_work.work);
+ int r;
+
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
+static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
+
+ WARN_ON_ONCE(adev->gfx.gfx_off_state);
+ WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ /*
+ * Per PMFW team's suggestion, driver needs to handle gfxoff
+ * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
+ * scenario. Add the missing df cstate disablement here.
+ */
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+
+ /* displays are handled separately */
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+
+ /* XXX handle errors */
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+ /* XXX handle errors */
+ if (r) {
+ DRM_ERROR("suspend of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (adev->in_s0ix)
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+ /* PSP lost connection when err_event_athub occurs */
+ if (amdgpu_ras_intr_triggered() &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+ adev->ip_blocks[i].status.hw = false;
+ continue;
+ }
+
+ /* skip unnecessary suspend if we do not initialize them yet */
+ if (adev->gmc.xgmi.pending_reset &&
+ !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
+ adev->ip_blocks[i].status.hw = false;
+ continue;
+ }
+
+ /* skip suspend of gfx/mes and psp for S0ix
+ * gfx is in gfxoff state, so on resume it will exit gfxoff just
+ * like at runtime. PSP is also part of the always on hardware
+ * so no need to suspend it.
+ */
+ if (adev->in_s0ix &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
+ continue;
+
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (adev->in_s0ix &&
+ (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+ * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+ * from this location and RLC Autoload automatically also gets loaded
+ * from here based on PMFW -> PSP message during re-init sequence.
+ * Therefore, the psp suspend & resume should be skipped to avoid destroy
+ * the TMR and reload FWs again for IMU enabled APU ASICs.
+ */
+ if (amdgpu_in_reset(adev) &&
+ (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
+ /* XXX handle errors */
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+ /* XXX handle errors */
+ if (r) {
+ DRM_ERROR("suspend of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ adev->ip_blocks[i].status.hw = false;
+ /* handle putting the SMC in the appropriate state */
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
+ if (r) {
+ DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
+ return r;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_full_gpu(adev, false);
+ }
+
+ r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ return r;
+ r = amdgpu_device_ip_suspend_phase2(adev);
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
+ return r;
+}
+
+static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ static enum amd_ip_block_type ip_order[] = {
+ AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_GMC,
+ AMD_IP_BLOCK_TYPE_PSP,
+ AMD_IP_BLOCK_TYPE_IH,
+ };
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ int j;
+ struct amdgpu_ip_block *block;
+
+ block = &adev->ip_blocks[i];
+ block->status.hw = false;
+
+ for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
+
+ if (block->version->type != ip_order[j] ||
+ !block->status.valid)
+ continue;
+
+ r = block->version->funcs->hw_init(adev);
+ DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
+ if (r)
+ return r;
+ block->status.hw = true;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ static enum amd_ip_block_type ip_order[] = {
+ AMD_IP_BLOCK_TYPE_SMC,
+ AMD_IP_BLOCK_TYPE_DCE,
+ AMD_IP_BLOCK_TYPE_GFX,
+ AMD_IP_BLOCK_TYPE_SDMA,
+ AMD_IP_BLOCK_TYPE_MES,
+ AMD_IP_BLOCK_TYPE_UVD,
+ AMD_IP_BLOCK_TYPE_VCE,
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_IP_BLOCK_TYPE_JPEG
+ };
+
+ for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+ int j;
+ struct amdgpu_ip_block *block;
+
+ for (j = 0; j < adev->num_ip_blocks; j++) {
+ block = &adev->ip_blocks[j];
+
+ if (block->version->type != ip_order[i] ||
+ !block->status.valid ||
+ block->status.hw)
+ continue;
+
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
+ r = block->version->funcs->resume(adev);
+ else
+ r = block->version->funcs->hw_init(adev);
+
+ DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
+ if (r)
+ return r;
+ block->status.hw = true;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * COMMON, GMC, and IH. resume puts the hardware into a functional state
+ * after a suspend and updates the software state as necessary. This
+ * function is also used for restoring the GPU after a GPU reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
+
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+ * necessary. This function is also used for restoring the GPU after a GPU
+ * reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main resume function for hardware IPs. The hardware IPs
+ * are split into two resume functions because they are
+ * also used in recovering from a GPU reset and some additional
+ * steps need to be take between them. In this case (S3/S4) they are
+ * run sequentially.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_device_ip_resume_phase1(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_device_ip_resume_phase2(adev);
+
+ return r;
+}
+
+/**
+ * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Query the VBIOS data tables to determine if the board supports SR-IOV.
+ */
+static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->is_atom_fw) {
+ if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+ } else {
+ if (amdgpu_atombios_has_gpu_virtualization_table(adev))
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+ }
+
+ if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
+ }
+}
+
+/**
+ * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
+ *
+ * @asic_type: AMD asic type
+ *
+ * Check if there is DC (new modesetting infrastructre) support for an asic.
+ * returns true if DC has support, false if not.
+ */
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+{
+ switch (asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
+#if defined(CONFIG_DRM_AMD_DC)
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ /*
+ * We have systems in the wild with these ASICs that require
+ * LVDS and VGA support which is not supported with DC.
+ *
+ * Fallback to the non-DC driver here by default so as not to
+ * cause regressions.
+ */
+#if defined(CONFIG_DRM_AMD_DC_SI)
+ return amdgpu_dc > 0;
+#else
+ return false;
+#endif
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ /*
+ * We have systems in the wild with these ASICs that require
+ * VGA support which is not supported with DC.
+ *
+ * Fallback to the non-DC driver here by default so as not to
+ * cause regressions.
+ */
+ return amdgpu_dc > 0;
+ default:
+ return amdgpu_dc != 0;
+#else
+ default:
+ if (amdgpu_dc > 0)
+ DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
+ return false;
+#endif
+ }
+}
+
+/**
+ * amdgpu_device_has_dc_support - check if dc is supported
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns true for supported, false for not supported
+ */
+bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
+{
+ if (adev->enable_virtual_display ||
+ (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+ return false;
+
+ return amdgpu_device_asic_has_dc_support(adev->asic_type);
+}
+
+static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
+{
+ struct amdgpu_device *adev =
+ container_of(__work, struct amdgpu_device, xgmi_reset_work);
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ /* It's a bug to not have a hive within this function */
+ if (WARN_ON(!hive))
+ return;
+
+ /*
+ * Use task barrier to synchronize all xgmi reset works across the
+ * hive. task_barrier_enter and task_barrier_exit will block
+ * until all the threads running the xgmi reset works reach
+ * those points. task_barrier_full will do both blocks.
+ */
+ if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+ task_barrier_enter(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
+
+ if (adev->asic_reset_res)
+ goto fail;
+
+ task_barrier_exit(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
+
+ if (adev->asic_reset_res)
+ goto fail;
+
+ if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
+ adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+ adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ } else {
+
+ task_barrier_full(&hive->tb);
+ adev->asic_reset_res = amdgpu_asic_reset(adev);
+ }
+
+fail:
+ if (adev->asic_reset_res)
+ DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
+ adev->asic_reset_res, adev_to_drm(adev)->unique);
+ amdgpu_put_xgmi_hive(hive);
+}
+
+static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+ char *input = amdgpu_lockup_timeout;
+ char *timeout_setting = NULL;
+ int index = 0;
+ long timeout;
+ int ret = 0;
+
+ /*
+ * By default timeout for non compute jobs is 10000
+ * and 60000 for compute jobs.
+ * In SR-IOV or passthrough mode, timeout for compute
+ * jobs are 60000 by default.
+ */
+ adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev))
+ adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
+ msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
+ else
+ adev->compute_timeout = msecs_to_jiffies(60000);
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
+
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ } else {
+ timeout = msecs_to_jiffies(timeout);
+ }
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
+ }
+ }
+ /*
+ * There is only one value specified and
+ * it should apply to all non-compute jobs.
+ */
+ if (index == 1) {
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ adev->compute_timeout = adev->gfx_timeout;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
+ */
+static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+ adev->ram_is_direct_mapped = true;
+}
+
+static const struct attribute *amdgpu_dev_attributes[] = {
+ &dev_attr_pcie_replay_count.attr,
+ NULL
+};
+
+static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
+{
+ if (amdgpu_mcbp == 1)
+ adev->gfx.mcbp = true;
+ else if (amdgpu_mcbp == 0)
+ adev->gfx.mcbp = false;
+
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.mcbp = true;
+
+ if (adev->gfx.mcbp)
+ DRM_INFO("MCBP is enabled\n");
+}
+
+/**
+ * amdgpu_device_init - initialize the driver
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: driver flags
+ *
+ * Initializes the driver info and hw (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver startup.
+ */
+int amdgpu_device_init(struct amdgpu_device *adev,
+ uint32_t flags)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct pci_dev *pdev = adev->pdev;
+ int r, i;
+ bool px = false;
+ u32 max_MBps;
+ int tmp;
+
+ adev->shutdown = false;
+ adev->flags = flags;
+
+ if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+ adev->asic_type = amdgpu_force_asic_type;
+ else
+ adev->asic_type = flags & AMD_ASIC_MASK;
+
+ adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
+ if (amdgpu_emu_mode == 1)
+ adev->usec_timeout *= 10;
+ adev->gmc.gart_size = 512 * 1024 * 1024;
+ adev->accel_working = false;
+ adev->num_rings = 0;
+ RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
+ adev->mman.buffer_funcs = NULL;
+ adev->mman.buffer_funcs_ring = NULL;
+ adev->vm_manager.vm_pte_funcs = NULL;
+ adev->vm_manager.vm_pte_num_scheds = 0;
+ adev->gmc.gmc_funcs = NULL;
+ adev->harvest_ip_mask = 0x0;
+ adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+ bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ adev->smc_rreg = &amdgpu_invalid_rreg;
+ adev->smc_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg = &amdgpu_invalid_rreg;
+ adev->pcie_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
+ adev->pciep_rreg = &amdgpu_invalid_rreg;
+ adev->pciep_wreg = &amdgpu_invalid_wreg;
+ adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
+ adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+ adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
+ adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
+ adev->didt_rreg = &amdgpu_invalid_rreg;
+ adev->didt_wreg = &amdgpu_invalid_wreg;
+ adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+ adev->gc_cac_wreg = &amdgpu_invalid_wreg;
+ adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
+ adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
+
+ DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
+ amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
+
+ /* mutex initialization are all done here so we
+ * can recall function without having locking issues
+ */
+ mutex_init(&adev->firmware.mutex);
+ mutex_init(&adev->pm.mutex);
+ mutex_init(&adev->gfx.gpu_clock_mutex);
+ mutex_init(&adev->srbm_mutex);
+ mutex_init(&adev->gfx.pipe_reserve_mutex);
+ mutex_init(&adev->gfx.gfx_off_mutex);
+ mutex_init(&adev->gfx.partition_mutex);
+ mutex_init(&adev->grbm_idx_mutex);
+ mutex_init(&adev->mn_lock);
+ mutex_init(&adev->virt.vf_errors.lock);
+ hash_init(adev->mn_hash);
+ mutex_init(&adev->psp.mutex);
+ mutex_init(&adev->notifier_lock);
+ mutex_init(&adev->pm.stable_pstate_ctx_lock);
+ mutex_init(&adev->benchmark_mutex);
+
+ amdgpu_device_init_apu_flags(adev);
+
+ r = amdgpu_device_check_arguments(adev);
+ if (r)
+ return r;
+
+ spin_lock_init(&adev->mmio_idx_lock);
+ spin_lock_init(&adev->smc_idx_lock);
+ spin_lock_init(&adev->pcie_idx_lock);
+ spin_lock_init(&adev->uvd_ctx_idx_lock);
+ spin_lock_init(&adev->didt_idx_lock);
+ spin_lock_init(&adev->gc_cac_idx_lock);
+ spin_lock_init(&adev->se_cac_idx_lock);
+ spin_lock_init(&adev->audio_endpt_idx_lock);
+ spin_lock_init(&adev->mm_stats.lock);
+
+ INIT_LIST_HEAD(&adev->shadow_list);
+ mutex_init(&adev->shadow_list_lock);
+
+ INIT_LIST_HEAD(&adev->reset_list);
+
+ INIT_LIST_HEAD(&adev->ras_list);
+
+ INIT_DELAYED_WORK(&adev->delayed_init_work,
+ amdgpu_device_delayed_init_work_handler);
+ INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+ amdgpu_device_delay_enable_gfx_off);
+
+ INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+
+ adev->gfx.gfx_off_req_count = 1;
+ adev->gfx.gfx_off_residency = 0;
+ adev->gfx.gfx_off_entrycount = 0;
+ adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+
+ atomic_set(&adev->throttling_logging_enabled, 1);
+ /*
+ * If throttling continues, logging will be performed every minute
+ * to avoid log flooding. "-1" is subtracted since the thermal
+ * throttling interrupt comes every second. Thus, the total logging
+ * interval is 59 seconds(retelimited printk interval) + 1(waiting
+ * for throttling interrupt) = 60 seconds.
+ */
+ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+ ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
+
+ /* Registers mapping */
+ /* TODO: block userspace mapping of io register */
+ if (adev->asic_type >= CHIP_BONAIRE) {
+ adev->rmmio_base = pci_resource_start(adev->pdev, 5);
+ adev->rmmio_size = pci_resource_len(adev->pdev, 5);
+ } else {
+ adev->rmmio_base = pci_resource_start(adev->pdev, 2);
+ adev->rmmio_size = pci_resource_len(adev->pdev, 2);
+ }
+
+ for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
+ atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
+
+ adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
+ if (!adev->rmmio)
+ return -ENOMEM;
+
+ DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
+ DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
+
+ /*
+ * Reset domain needs to be present early, before XGMI hive discovered
+ * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * early on during init and before calling to RREG32.
+ */
+ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
+ if (!adev->reset_domain)
+ return -ENOMEM;
+
+ /* detect hw virtualization here */
+ amdgpu_detect_virtualization(adev);
+
+ amdgpu_device_get_pcie_info(adev);
+
+ r = amdgpu_device_get_job_timeout_settings(adev);
+ if (r) {
+ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+ return r;
+ }
+
+ /* early init functions */
+ r = amdgpu_device_ip_early_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_set_mcbp(adev);
+
+ /* Get rid of things like offb */
+ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
+ if (r)
+ return r;
+
+ /* Enable TMZ based on IP_VERSION */
+ amdgpu_gmc_tmz_set(adev);
+
+ amdgpu_gmc_noretry_set(adev);
+ /* Need to get xgmi info early to decide the reset behavior*/
+ if (adev->gmc.xgmi.supported) {
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+
+ /* enable PCIE atomic ops */
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->virt.fw_reserve.p_pf2vf)
+ adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+ (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+ * internal path natively support atomics, set have_atomics_support to true.
+ */
+ } else if ((adev->flags & AMD_IS_APU) &&
+ (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
+ adev->have_atomics_support = true;
+ } else {
+ adev->have_atomics_support =
+ !pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ }
+
+ if (!adev->have_atomics_support)
+ dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
+ /* doorbell bar mapping and doorbell index init*/
+ amdgpu_doorbell_init(adev);
+
+ if (amdgpu_emu_mode == 1) {
+ /* post the asic on emulation mode */
+ emu_soc_asic_init(adev);
+ goto fence_driver_init;
+ }
+
+ amdgpu_reset_init(adev);
+
+ /* detect if we are with an SRIOV vbios */
+ if (adev->bios)
+ amdgpu_device_detect_sriov_bios(adev);
+
+ /* check if we need to reset the asic
+ * E.g., driver was not cleanly unloaded previously, etc.
+ */
+ if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
+ if (adev->gmc.xgmi.num_physical_nodes) {
+ dev_info(adev->dev, "Pending hive reset.\n");
+ adev->gmc.xgmi.pending_reset = true;
+ /* Only need to init necessary block for SMU to handle the reset */
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
+ DRM_DEBUG("IP %s disabled for hw_init.\n",
+ adev->ip_blocks[i].version->funcs->name);
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+ } else {
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
+ }
+ }
+ }
+
+ /* Post card if necessary */
+ if (amdgpu_device_need_post(adev)) {
+ if (!adev->bios) {
+ dev_err(adev->dev, "no vBIOS found\n");
+ r = -EINVAL;
+ goto failed;
+ }
+ DRM_INFO("GPU posting now...\n");
+ r = amdgpu_device_asic_init(adev);
+ if (r) {
+ dev_err(adev->dev, "gpu post error!\n");
+ goto failed;
+ }
+ }
+
+ if (adev->bios) {
+ if (adev->is_atom_fw) {
+ /* Initialize clocks */
+ r = amdgpu_atomfirmware_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ } else {
+ /* Initialize clocks */
+ r = amdgpu_atombios_get_clock_info(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+ goto failed;
+ }
+ /* init i2c buses */
+ if (!amdgpu_device_has_dc_support(adev))
+ amdgpu_atombios_i2c_init(adev);
+ }
+ }
+
+fence_driver_init:
+ /* Fence driver */
+ r = amdgpu_fence_driver_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
+ goto failed;
+ }
+
+ /* init the mode config */
+ drm_mode_config_init(adev_to_drm(adev));
+
+ r = amdgpu_device_ip_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+ goto release_ras_con;
+ }
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ dev_info(adev->dev,
+ "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se,
+ adev->gfx.config.max_cu_per_sh,
+ adev->gfx.cu_info.number);
+
+ adev->accel_working = true;
+
+ amdgpu_vm_check_compute_bug(adev);
+
+ /* Initialize the buffer migration limit. */
+ if (amdgpu_moverate >= 0)
+ max_MBps = amdgpu_moverate;
+ else
+ max_MBps = 8; /* Allow 8 MB/s. */
+ /* Get a log2 for easy divisions. */
+ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
+
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ DRM_ERROR("registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ /*
+ * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+ * Otherwise the mgpu fan boost feature will be skipped due to the
+ * gpu instance is counted less.
+ */
+ amdgpu_register_gpu_instance(adev);
+
+ /* enable clockgating, etc. after ib tests, etc. since some blocks require
+ * explicit gating rather than handling it automatically.
+ */
+ if (!adev->gmc.xgmi.pending_reset) {
+ r = amdgpu_device_ip_late_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
+ goto release_ras_con;
+ }
+ /* must succeed. */
+ amdgpu_ras_resume(adev);
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_release_full_gpu(adev, true);
+ flush_delayed_work(&adev->delayed_init_work);
+ }
+
+ r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ if (r)
+ dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+ amdgpu_fru_sysfs_init(adev);
+
+ if (IS_ENABLED(CONFIG_PERF_EVENTS))
+ r = amdgpu_pmu_init(adev);
+ if (r)
+ dev_err(adev->dev, "amdgpu_pmu_init failed\n");
+
+ /* Have stored pci confspace at hand for restore in sudden PCI error */
+ if (amdgpu_device_cache_pci_state(adev->pdev))
+ pci_restore_state(pdev);
+
+ /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
+ /* this will fail for cards that aren't VGA class devices, just
+ * ignore it
+ */
+ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
+
+ px = amdgpu_device_supports_px(ddev);
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
+ vga_switcheroo_register_client(adev->pdev,
+ &amdgpu_switcheroo_ops, px);
+
+ if (px)
+ vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+
+ if (adev->gmc.xgmi.pending_reset)
+ queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+ amdgpu_device_check_iommu_direct_map(adev);
+
+ return 0;
+
+release_ras_con:
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ /* failed in exclusive mode due to timeout */
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_mmio_blocked(adev) &&
+ !amdgpu_virt_wait_reset(adev)) {
+ dev_err(adev->dev, "VF exclusive mode timeout\n");
+ /* Don't send request since VF is inactive. */
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ adev->virt.ops = NULL;
+ r = -EAGAIN;
+ }
+ amdgpu_release_ras_context(adev);
+
+failed:
+ amdgpu_vf_error_trans_all(adev);
+
+ return r;
+}
+
+static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
+{
+
+ /* Clear all CPU mappings pointing to this device */
+ unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
+
+ /* Unmap all mapped bars - Doorbell, registers and VRAM */
+ amdgpu_doorbell_fini(adev);
+
+ iounmap(adev->rmmio);
+ adev->rmmio = NULL;
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
+
+ /* Memory manager related */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
+}
+
+/**
+ * amdgpu_device_fini_hw - tear down the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the driver info (all asics).
+ * Called at driver shutdown.
+ */
+void amdgpu_device_fini_hw(struct amdgpu_device *adev)
+{
+ dev_info(adev->dev, "amdgpu: finishing device.\n");
+ flush_delayed_work(&adev->delayed_init_work);
+ adev->shutdown = true;
+
+ /* make sure IB test finished before entering exclusive mode
+ * to avoid preemption on IB test
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_request_full_gpu(adev, false);
+ amdgpu_virt_fini_data_exchange(adev);
+ }
+
+ /* disable all interrupts */
+ amdgpu_irq_disable_all(adev);
+ if (adev->mode_info.mode_config_initialized) {
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
+ drm_helper_force_disable_all(adev_to_drm(adev));
+ else
+ drm_atomic_helper_shutdown(adev_to_drm(adev));
+ }
+ amdgpu_fence_driver_hw_fini(adev);
+
+ if (adev->mman.initialized)
+ drain_workqueue(adev->mman.bdev.wq);
+
+ if (adev->pm.sysfs_initialized)
+ amdgpu_pm_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
+ sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ amdgpu_fru_sysfs_fini(adev);
+
+ /* disable ras feature must before hw fini */
+ amdgpu_ras_pre_fini(adev);
+
+ amdgpu_device_ip_fini_early(adev);
+
+ amdgpu_irq_fini_hw(adev);
+
+ if (adev->mman.initialized)
+ ttm_device_clear_dma_mappings(&adev->mman.bdev);
+
+ amdgpu_gart_dummy_page_fini(adev);
+
+ if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ amdgpu_device_unmap_mmio(adev);
+
+}
+
+void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+{
+ int idx;
+ bool px;
+
+ amdgpu_fence_driver_sw_fini(adev);
+ amdgpu_device_ip_fini(adev);
+ amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
+ adev->accel_working = false;
+ dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+
+ amdgpu_reset_fini(adev);
+
+ /* free i2c buses */
+ if (!amdgpu_device_has_dc_support(adev))
+ amdgpu_i2c_fini(adev);
+
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+
+ kfree(adev->bios);
+ adev->bios = NULL;
+
+ px = amdgpu_device_supports_px(adev_to_drm(adev));
+
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
+ apple_gmux_detect(NULL, NULL)))
+ vga_switcheroo_unregister_client(adev->pdev);
+
+ if (px)
+ vga_switcheroo_fini_domain_pm_ops(adev->dev);
+
+ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ vga_client_unregister(adev->pdev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ iounmap(adev->rmmio);
+ adev->rmmio = NULL;
+ amdgpu_doorbell_fini(adev);
+ drm_dev_exit(idx);
+ }
+
+ if (IS_ENABLED(CONFIG_PERF_EVENTS))
+ amdgpu_pmu_fini(adev);
+ if (adev->mman.discovery_bin)
+ amdgpu_discovery_fini(adev);
+
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = NULL;
+
+ kfree(adev->pci_state);
+
+}
+
+/**
+ * amdgpu_device_evict_resources - evict device resources
+ * @adev: amdgpu device object
+ *
+ * Evicts all ttm device resources(vram BOs, gart table) from the lru list
+ * of the vram memory type. Mainly used for evicting device resources
+ * at suspend time.
+ *
+ */
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
+{
+ int ret;
+
+ /* No need to evict vram on APUs for suspend to ram or s2idle */
+ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+ return 0;
+
+ ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+ if (ret)
+ DRM_WARN("evicting device resources failed\n");
+ return ret;
+}
+
+/*
+ * Suspend & resume.
+ */
+/**
+ * amdgpu_device_suspend - initiate device suspend
+ *
+ * @dev: drm dev pointer
+ * @fbcon : notify the fbdev of suspend
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ adev->in_suspend = true;
+
+ /* Evict the majority of BOs before grabbing the full access */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
+ DRM_WARN("smart shift update failed\n");
+
+ if (fbcon)
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
+
+ cancel_delayed_work_sync(&adev->delayed_init_work);
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ amdgpu_ras_suspend(adev);
+
+ amdgpu_device_ip_suspend_phase1(adev);
+
+ if (!adev->in_s0ix)
+ amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_fini(adev);
+
+ amdgpu_device_ip_suspend_phase2(adev);
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_resume - initiate device resume
+ *
+ * @dev: drm dev pointer
+ * @fbcon : notify the fbdev of resume
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ if (adev->in_s0ix)
+ amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
+
+ /* post card */
+ if (amdgpu_device_need_post(adev)) {
+ r = amdgpu_device_asic_init(adev);
+ if (r)
+ dev_err(adev->dev, "amdgpu asic init failed\n");
+ }
+
+ r = amdgpu_device_ip_resume(adev);
+
+ if (r) {
+ dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+ goto exit;
+ }
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_late_init(adev);
+ if (r)
+ goto exit;
+
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+ if (!adev->in_s0ix) {
+ r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+ if (r)
+ goto exit;
+ }
+
+exit:
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
+ }
+
+ if (r)
+ return r;
+
+ /* Make sure IB tests flushed */
+ flush_delayed_work(&adev->delayed_init_work);
+
+ if (fbcon)
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
+
+ amdgpu_ras_resume(adev);
+
+ if (adev->mode_info.num_crtc) {
+ /*
+ * Most of the connector probing functions try to acquire runtime pm
+ * refs to ensure that the GPU is powered on when connector polling is
+ * performed. Since we're calling this from a runtime PM callback,
+ * trying to acquire rpm refs will cause us to deadlock.
+ *
+ * Since we're guaranteed to be holding the rpm lock, it's safe to
+ * temporarily disable the rpm helpers so this doesn't deadlock us.
+ */
+#ifdef CONFIG_PM
+ dev->dev->power.disable_depth++;
+#endif
+ if (!adev->dc_enabled)
+ drm_helper_hpd_irq_event(dev);
+ else
+ drm_kms_helper_hotplug_event(dev);
+#ifdef CONFIG_PM
+ dev->dev->power.disable_depth--;
+#endif
+ }
+ adev->in_suspend = false;
+
+ if (adev->enable_mes)
+ amdgpu_mes_self_test(adev);
+
+ if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
+ DRM_WARN("smart shift update failed\n");
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_check_soft_reset - did soft reset succeed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and
+ * the check_soft_reset callbacks are run. check_soft_reset determines
+ * if the asic is still hung or not.
+ * Returns true if any of the IPs are still in a hung state, false if not.
+ */
+static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
+{
+ int i;
+ bool asic_hang = false;
+
+ if (amdgpu_sriov_vf(adev))
+ return true;
+
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->funcs->check_soft_reset)
+ adev->ip_blocks[i].status.hang =
+ adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+ if (adev->ip_blocks[i].status.hang) {
+ dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
+ asic_hang = true;
+ }
+ }
+ return asic_hang;
+}
+
+/**
+ * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary for a soft reset to succeed.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].status.hang &&
+ adev->ip_blocks[i].version->funcs->pre_soft_reset) {
+ r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
+ * reset is necessary to recover.
+ * Returns true if a full asic reset is required, false if not.
+ */
+static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+ if (adev->ip_blocks[i].status.hang) {
+ dev_info(adev->dev, "Some block need full reset!\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_soft_reset - do a soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * soft_reset callbacks are run if the block is hung. soft_reset handles any
+ * IP specific hardware or software state changes that are necessary to soft
+ * reset the IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].status.hang &&
+ adev->ip_blocks[i].version->funcs->soft_reset) {
+ r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_post_soft_reset - clean up from soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary after the IP has been soft reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].status.hang &&
+ adev->ip_blocks[i].version->funcs->post_soft_reset)
+ r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_recover_vram - Recover some VRAM contents
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Restores the contents of VRAM buffers from the shadows in GTT. Used to
+ * restore things like GPUVM page tables after a GPU reset where
+ * the contents of VRAM might be lost.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
+{
+ struct dma_fence *fence = NULL, *next = NULL;
+ struct amdgpu_bo *shadow;
+ struct amdgpu_bo_vm *vmbo;
+ long r = 1, tmo;
+
+ if (amdgpu_sriov_runtime(adev))
+ tmo = msecs_to_jiffies(8000);
+ else
+ tmo = msecs_to_jiffies(100);
+
+ dev_info(adev->dev, "recover vram bo from shadow start\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
+ /* If vm is compute context or adev is APU, shadow will be NULL */
+ if (!vmbo->shadow)
+ continue;
+ shadow = vmbo->shadow;
+
+ /* No need to recover an evicted BO */
+ if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
+ shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
+ shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
+ continue;
+
+ r = amdgpu_bo_restore_shadow(shadow, &next);
+ if (r)
+ break;
+
+ if (fence) {
+ tmo = dma_fence_wait_timeout(fence, false, tmo);
+ dma_fence_put(fence);
+ fence = next;
+ if (tmo == 0) {
+ r = -ETIMEDOUT;
+ break;
+ } else if (tmo < 0) {
+ r = tmo;
+ break;
+ }
+ } else {
+ fence = next;
+ }
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+
+ if (fence)
+ tmo = dma_fence_wait_timeout(fence, false, tmo);
+ dma_fence_put(fence);
+
+ if (r < 0 || tmo <= 0) {
+ dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
+ return -EIO;
+ }
+
+ dev_info(adev->dev, "recover vram bo from shadow done\n");
+ return 0;
+}
+
+
+/**
+ * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
+ *
+ * @adev: amdgpu_device pointer
+ * @from_hypervisor: request from hypervisor
+ *
+ * do VF FLR and reinitialize Asic
+ * return 0 means succeeded otherwise failed
+ */
+static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+ bool from_hypervisor)
+{
+ int r;
+ struct amdgpu_hive_info *hive = NULL;
+ int retry_limit = 0;
+
+retry:
+ amdgpu_amdkfd_pre_reset(adev);
+
+ if (from_hypervisor)
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ else
+ r = amdgpu_virt_reset_gpu(adev);
+ if (r)
+ return r;
+ amdgpu_irq_gpu_reset_resume_helper(adev);
+
+ /* some sw clean up VF needs to do before recover */
+ amdgpu_virt_post_reset(adev);
+
+ /* Resume IP prior to SMC */
+ r = amdgpu_device_ip_reinit_early_sriov(adev);
+ if (r)
+ goto error;
+
+ amdgpu_virt_init_data_exchange(adev);
+
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
+ /* now we are okay to resume SMC/CP/SDMA */
+ r = amdgpu_device_ip_reinit_late_sriov(adev);
+ if (r)
+ goto error;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ /* Update PSP FW topology after reset */
+ if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(hive, adev);
+
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+
+ if (!r) {
+ r = amdgpu_ib_ring_tests(adev);
+
+ amdgpu_amdkfd_post_reset(adev);
+ }
+
+error:
+ if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ amdgpu_inc_vram_lost(adev);
+ r = amdgpu_device_recover_vram(adev);
+ }
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ if (AMDGPU_RETRY_SRIOV_RESET(r)) {
+ if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
+ retry_limit++;
+ goto retry;
+ } else
+ DRM_ERROR("GPU reset retry is beyond the retry limit\n");
+ }
+
+ return r;
+}
+
+/**
+ * amdgpu_device_has_job_running - check if there is any job in mirror list
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * check if there is any job in mirror list
+ */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
+{
+ int i;
+ struct drm_sched_job *job;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ spin_lock(&ring->sched.job_list_lock);
+ job = list_first_entry_or_null(&ring->sched.pending_list,
+ struct drm_sched_job, list);
+ spin_unlock(&ring->sched.job_list_lock);
+ if (job)
+ return true;
+ }
+ return false;
+}
+
+/**
+ * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
+ * a hung GPU.
+ */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
+{
+
+ if (amdgpu_gpu_recovery == 0)
+ goto disabled;
+
+ /* Skip soft reset check in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(adev))
+ return true;
+
+ if (amdgpu_sriov_vf(adev))
+ return true;
+
+ if (amdgpu_gpu_recovery == -1) {
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_CYAN_SKILLFISH:
+ goto disabled;
+ default:
+ break;
+ }
+ }
+
+ return true;
+
+disabled:
+ dev_info(adev->dev, "GPU recovery disabled.\n");
+ return false;
+}
+
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ dev_info(adev->dev, "GPU mode1 reset\n");
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ goto mode1_reset_failed;
+
+ amdgpu_device_load_pci_state(adev->pdev);
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto mode1_reset_failed;
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ ret = -ETIMEDOUT;
+ goto mode1_reset_failed;
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return 0;
+
+mode1_reset_failed:
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ return ret;
+}
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
+{
+ int i, r = 0;
+ struct amdgpu_job *job = NULL;
+ bool need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+ if (reset_context->reset_req_dev == adev)
+ job = reset_context->job;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* stop the data exchange thread */
+ amdgpu_virt_fini_data_exchange(adev);
+ }
+
+ amdgpu_fence_driver_isr_toggle(adev, true);
+
+ /* block all schedulers and reset given job's ring */
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ /* Clear job fence from fence drv to avoid force_completion
+ * leave NULL and vm flush fence in fence drv
+ */
+ amdgpu_fence_driver_clear_job_fences(ring);
+
+ /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+ amdgpu_fence_driver_force_completion(ring);
+ }
+
+ amdgpu_fence_driver_isr_toggle(adev, false);
+
+ if (job && job->vm)
+ drm_sched_increase_karma(&job->base);
+
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+ if (!amdgpu_sriov_vf(adev)) {
+
+ if (!need_full_reset)
+ need_full_reset = amdgpu_device_ip_need_full_reset(adev);
+
+ if (!need_full_reset && amdgpu_gpu_recovery &&
+ amdgpu_device_ip_check_soft_reset(adev)) {
+ amdgpu_device_ip_pre_soft_reset(adev);
+ r = amdgpu_device_ip_soft_reset(adev);
+ amdgpu_device_ip_post_soft_reset(adev);
+ if (r || amdgpu_device_ip_check_soft_reset(adev)) {
+ dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
+ need_full_reset = true;
+ }
+ }
+
+ if (need_full_reset)
+ r = amdgpu_device_ip_suspend(adev);
+ if (need_full_reset)
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ else
+ clear_bit(AMDGPU_NEED_FULL_RESET,
+ &reset_context->flags);
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
+{
+ int i;
+
+ lockdep_assert_held(&adev->reset_domain->sem);
+
+ for (i = 0; i < adev->num_regs; i++) {
+ adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
+ trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
+ adev->reset_dump_reg_value[i]);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_DEV_COREDUMP
+static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
+ size_t count, void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_device *adev = data;
+ struct drm_print_iterator iter;
+ int i;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
+ if (adev->reset_task_info.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ adev->reset_task_info.process_name,
+ adev->reset_task_info.pid);
+
+ if (adev->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+ if (adev->num_regs) {
+ drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
+
+ for (i = 0; i < adev->num_regs; i++)
+ drm_printf(&p, "0x%08x: 0x%08x\n",
+ adev->reset_dump_reg_list[i],
+ adev->reset_dump_reg_value[i]);
+ }
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+}
+
+static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+
+ ktime_get_ts64(&adev->reset_time);
+ dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+}
+#endif
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ bool need_full_reset, skip_hw_reset, vram_lost = false;
+ int r = 0;
+ bool gpu_reset_for_dev_remove = 0;
+
+ /* Try reset handler method first */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+ amdgpu_reset_reg_dumps(tmp_adev);
+
+ reset_context->reset_device_list = device_list_handle;
+ r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Reset handler not implemented, use the default method */
+ need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+ gpu_reset_for_dev_remove =
+ test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+ /*
+ * ASIC reset has to be done on all XGMI hive nodes ASAP
+ * to allow proper links negotiation in FW (within 1 sec)
+ */
+ if (!skip_hw_reset && need_full_reset) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ tmp_adev->gmc.xgmi.pending_reset = false;
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ } else
+ r = amdgpu_asic_reset(tmp_adev);
+
+ if (r) {
+ dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+ }
+
+ if (!r && amdgpu_ras_intr_triggered()) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
+ tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+ tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
+ }
+
+ amdgpu_ras_intr_cleared();
+ }
+
+ /* Since the mode1 reset affects base ip blocks, the
+ * phase1 ip blocks need to be resumed. Otherwise there
+ * will be a BIOS signature error and the psp bootloader
+ * can't load kdb on the next amdgpu install.
+ */
+ if (gpu_reset_for_dev_remove) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+ amdgpu_device_ip_resume_phase1(tmp_adev);
+
+ goto end;
+ }
+
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ if (need_full_reset) {
+ /* post card */
+ r = amdgpu_device_asic_init(tmp_adev);
+ if (r) {
+ dev_warn(tmp_adev->dev, "asic atom init failed!");
+ } else {
+ dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+
+ r = amdgpu_device_ip_resume_phase1(tmp_adev);
+ if (r)
+ goto out;
+
+ vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
+#ifdef CONFIG_DEV_COREDUMP
+ tmp_adev->reset_vram_lost = vram_lost;
+ memset(&tmp_adev->reset_task_info, 0,
+ sizeof(tmp_adev->reset_task_info));
+ if (reset_context->job && reset_context->job->vm)
+ tmp_adev->reset_task_info =
+ reset_context->job->vm->task_info;
+ amdgpu_reset_capture_coredumpm(tmp_adev);
+#endif
+ if (vram_lost) {
+ DRM_INFO("VRAM is lost due to GPU reset!\n");
+ amdgpu_inc_vram_lost(tmp_adev);
+ }
+
+ r = amdgpu_device_fw_loading(tmp_adev);
+ if (r)
+ return r;
+
+ r = amdgpu_device_ip_resume_phase2(tmp_adev);
+ if (r)
+ goto out;
+
+ if (vram_lost)
+ amdgpu_device_fill_reset_magic(tmp_adev);
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ if (!reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ amdgpu_xgmi_add_device(tmp_adev);
+
+ r = amdgpu_device_ip_late_init(tmp_adev);
+ if (r)
+ goto out;
+
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
+
+ /*
+ * The GPU enters bad state once faulty pages
+ * by ECC has reached the threshold, and ras
+ * recovery is scheduled next. So add one check
+ * here to break recovery if it indeed exceeds
+ * bad page threshold, and remind user to
+ * retire this GPU or setting one bigger
+ * bad_page_threshold value to fix this once
+ * probing driver again.
+ */
+ if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
+ /* must succeed. */
+ amdgpu_ras_resume(tmp_adev);
+ } else {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* Update PSP FW topology after reset */
+ if (reset_context->hive &&
+ tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+ r = amdgpu_xgmi_update_topology(
+ reset_context->hive, tmp_adev);
+ }
+ }
+
+out:
+ if (!r) {
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
+ need_full_reset = true;
+ r = -EAGAIN;
+ goto end;
+ }
+ }
+
+ if (!r)
+ r = amdgpu_device_recover_vram(tmp_adev);
+ else
+ tmp_adev->asic_reset_res = r;
+ }
+
+end:
+ if (need_full_reset)
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ else
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ return r;
+}
+
+static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
+{
+
+ switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_MODE1:
+ adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ adev->mp1_state = PP_MP1_STATE_RESET;
+ break;
+ default:
+ adev->mp1_state = PP_MP1_STATE_NONE;
+ break;
+ }
+}
+
+static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
+{
+ amdgpu_vf_error_trans_all(adev);
+ adev->mp1_state = PP_MP1_STATE_NONE;
+}
+
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+ struct pci_dev *p = NULL;
+
+ p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+ adev->pdev->bus->number, 1);
+ if (p) {
+ pm_runtime_enable(&(p->dev));
+ pm_runtime_resume(&(p->dev));
+ }
+
+ pci_dev_put(p);
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+ enum amd_reset_method reset_method;
+ struct pci_dev *p = NULL;
+ u64 expires;
+
+ /*
+ * For now, only BACO and mode1 reset are confirmed
+ * to suffer the audio issue without proper suspended.
+ */
+ reset_method = amdgpu_asic_reset_method(adev);
+ if ((reset_method != AMD_RESET_METHOD_BACO) &&
+ (reset_method != AMD_RESET_METHOD_MODE1))
+ return -EINVAL;
+
+ p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+ adev->pdev->bus->number, 1);
+ if (!p)
+ return -ENODEV;
+
+ expires = pm_runtime_autosuspend_expiration(&(p->dev));
+ if (!expires)
+ /*
+ * If we cannot get the audio device autosuspend delay,
+ * a fixed 4S interval will be used. Considering 3S is
+ * the audio controller default autosuspend delay setting.
+ * 4S used here is guaranteed to cover that.
+ */
+ expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
+
+ while (!pm_runtime_status_suspended(&(p->dev))) {
+ if (!pm_runtime_suspend(&(p->dev)))
+ break;
+
+ if (expires < ktime_get_mono_fast_ns()) {
+ dev_warn(adev->dev, "failed to suspend display audio\n");
+ pci_dev_put(p);
+ /* TODO: abort the succeeding gpu reset? */
+ return -ETIMEDOUT;
+ }
+ }
+
+ pm_runtime_disable(&(p->dev));
+
+ pci_dev_put(p);
+ return 0;
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+#if defined(CONFIG_DEBUG_FS)
+ if (!amdgpu_sriov_vf(adev))
+ cancel_work(&adev->reset_work);
+#endif
+
+ if (adev->kfd.dev)
+ cancel_work(&adev->kfd.reset_work);
+
+ if (amdgpu_sriov_vf(adev))
+ cancel_work(&adev->virt.flr_work);
+
+ if (con && adev->ras_enabled)
+ cancel_work(&con->recovery_work);
+
+}
+
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head device_list, *device_list_handle = NULL;
+ bool job_signaled = false;
+ struct amdgpu_hive_info *hive = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
+ bool need_emergency_restart = false;
+ bool audio_suspended = false;
+ bool gpu_reset_for_dev_remove = false;
+
+ gpu_reset_for_dev_remove =
+ test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+ /*
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
+ DRM_WARN("Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
+
+ dev_info(adev->dev, "GPU %s begin!\n",
+ need_emergency_restart ? "jobs stop":"reset");
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+
+ reset_context->job = job;
+ reset_context->hive = hive;
+ /*
+ * Build list of devices to reset.
+ * In case we are in XGMI hive mode, resort the device list
+ * to put adev in the 1st position.
+ */
+ INIT_LIST_HEAD(&device_list);
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ if (gpu_reset_for_dev_remove && adev->shutdown)
+ tmp_adev->shutdown = true;
+ }
+ if (!list_is_first(&adev->reset_list, &device_list))
+ list_rotate_to_front(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ } else {
+ list_add_tail(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ }
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+ /* block all schedulers and reset given job's ring */
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+
+ amdgpu_device_set_mp1_state(tmp_adev);
+
+ /*
+ * Try to put the audio codec into suspend state
+ * before gpu reset started.
+ *
+ * Due to the power domain of the graphics device
+ * is shared with AZ power domain. Without this,
+ * we may change the audio hardware from behind
+ * the audio driver's back. That will trigger
+ * some audio codec errors.
+ */
+ if (!amdgpu_device_suspend_display_audio(tmp_adev))
+ audio_suspended = true;
+
+ amdgpu_ras_set_error_query_ready(tmp_adev, false);
+
+ cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
+
+ if (!amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_pre_reset(tmp_adev);
+
+ /*
+ * Mark these ASICs to be reseted as untracked first
+ * And add them back after reset completed
+ */
+ amdgpu_unregister_gpu_instance(tmp_adev);
+
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
+
+ /* disable ras on ALL IPs */
+ if (!need_emergency_restart &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
+ amdgpu_ras_suspend(tmp_adev);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+ if (need_emergency_restart)
+ amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
+ }
+ atomic_inc(&tmp_adev->gpu_reset_counter);
+ }
+
+ if (need_emergency_restart)
+ goto skip_sched_resume;
+
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && dma_fence_is_signaled(&job->hw_fence)) {
+ job_signaled = true;
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+retry: /* Rest of adevs pre asic reset from XGMI hive. */
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ if (gpu_reset_for_dev_remove) {
+ /* Workaroud for ASICs need to disable SMC first */
+ amdgpu_device_smu_fini_early(tmp_adev);
+ }
+ r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
+ /*TODO Should we stop ?*/
+ if (r) {
+ dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
+ r, adev_to_drm(tmp_adev)->unique);
+ tmp_adev->asic_reset_res = r;
+ }
+
+ /*
+ * Drop all pending non scheduler resets. Scheduler resets
+ * were already dropped during drm_sched_stop
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
+ }
+
+ /* Actual ASIC resets if needed.*/
+ /* Host driver will handle XGMI hive reset for SRIOV */
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_device_reset_sriov(adev, job ? false : true);
+ if (r)
+ adev->asic_reset_res = r;
+
+ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
+ amdgpu_ras_resume(adev);
+ } else {
+ r = amdgpu_do_asic_reset(device_list_handle, reset_context);
+ if (r && r == -EAGAIN)
+ goto retry;
+
+ if (!r && gpu_reset_for_dev_remove)
+ goto recover_end;
+ }
+
+skip_hw_reset:
+
+ /* Post ASIC reset for all devs .*/
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ drm_sched_start(&ring->sched, true);
+ }
+
+ if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
+ amdgpu_mes_self_test(tmp_adev);
+
+ if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
+ drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
+
+ if (tmp_adev->asic_reset_res)
+ r = tmp_adev->asic_reset_res;
+
+ tmp_adev->asic_reset_res = 0;
+
+ if (r) {
+ /* bad news, how to tell it to userspace ? */
+ dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
+ amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ } else {
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
+ if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
+ DRM_WARN("smart shift update failed\n");
+ }
+ }
+
+skip_sched_resume:
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* unlock kfd: SRIOV would do it separately */
+ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_post_reset(tmp_adev);
+
+ /* kfd_post_reset will do nothing if kfd device is not initialized,
+ * need to bring up kfd here if it's not be initialized before
+ */
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+
+ if (audio_suspended)
+ amdgpu_device_resume_display_audio(tmp_adev);
+
+ amdgpu_device_unset_mp1_state(tmp_adev);
+
+ amdgpu_ras_set_error_query_ready(tmp_adev, true);
+ }
+
+recover_end:
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (r)
+ dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
+
+ atomic_set(&adev->reset_domain->reset_res, r);
+ return r;
+}
+
+/**
+ * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * and lanes) of the slot the device is in. Handles APUs and
+ * virtualized environments where PCIE config space may not be available.
+ */
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev;
+ enum pci_bus_speed speed_cap, platform_speed_cap;
+ enum pcie_link_width platform_link_width;
+
+ if (amdgpu_pcie_gen_cap)
+ adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
+
+ if (amdgpu_pcie_lane_cap)
+ adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
+
+ /* covers APUs as well */
+ if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
+ if (adev->pm.pcie_gen_mask == 0)
+ adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+ if (adev->pm.pcie_mlw_mask == 0)
+ adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
+ return;
+ }
+
+ if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
+ return;
+
+ pcie_bandwidth_available(adev->pdev, NULL,
+ &platform_speed_cap, &platform_link_width);
+
+ if (adev->pm.pcie_gen_mask == 0) {
+ /* asic caps */
+ pdev = adev->pdev;
+ speed_cap = pcie_get_speed_cap(pdev);
+ if (speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ } else {
+ if (speed_cap == PCIE_SPEED_32_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
+ else if (speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
+ }
+ /* platform caps */
+ if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ } else {
+ if (platform_speed_cap == PCIE_SPEED_32_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
+ else if (platform_speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (platform_speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (platform_speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+
+ }
+ }
+ if (adev->pm.pcie_mlw_mask == 0) {
+ if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
+ } else {
+ switch (platform_link_width) {
+ case PCIE_LNK_X32:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X16:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X12:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X8:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X4:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X2:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X1:
+ adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+ uint64_t address_mask = peer_adev->dev->dma_mask ?
+ ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+ resource_size_t aper_limit =
+ adev->gmc.aper_base + adev->gmc.aper_size - 1;
+ bool p2p_access =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+
+ return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
+ adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
+ !(adev->gmc.aper_base & address_mask ||
+ aper_limit & address_mask));
+#else
+ return false;
+#endif
+}
+
+int amdgpu_device_baco_enter(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_device_supports_baco(dev))
+ return -ENOTSUPP;
+
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+ return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (!amdgpu_device_supports_baco(dev))
+ return -ENOTSUPP;
+
+ ret = amdgpu_dpm_baco_exit(adev);
+ if (ret)
+ return ret;
+
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+ if (amdgpu_passthrough(adev) &&
+ adev->nbio.funcs->clear_doorbell_interrupt)
+ adev->nbio.funcs->clear_doorbell_interrupt(adev);
+
+ return 0;
+}
+
+/**
+ * amdgpu_pci_error_detected - Called when a PCI error is detected.
+ * @pdev: PCI device struct
+ * @state: PCI channel state
+ *
+ * Description: Called when a PCI error is detected.
+ *
+ * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
+ */
+pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ DRM_WARN("No support for XGMI hive yet...");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ adev->pci_channel_state = state;
+
+ switch (state) {
+ case pci_channel_io_normal:
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ /* Fatal error, prepare for slot reset */
+ case pci_channel_io_frozen:
+ /*
+ * Locking adev->reset_domain->sem will prevent any external access
+ * to GPU during PCI error recovery
+ */
+ amdgpu_device_lock_reset_domain(adev->reset_domain);
+ amdgpu_device_set_mp1_state(adev);
+
+ /*
+ * Block any work scheduling as we do for regular GPU reset
+ * for the duration of the recovery
+ */
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ drm_sched_stop(&ring->sched, NULL);
+ }
+ atomic_inc(&adev->gpu_reset_counter);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ /* Permanent error, prepare for device removal */
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
+ * @pdev: pointer to PCI device
+ */
+pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
+{
+
+ DRM_INFO("PCI error: mmio enabled callback!!\n");
+
+ /* TODO - dump whatever for debugging purposes */
+
+ /* This called only if amdgpu_pci_error_detected returns
+ * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
+ * works, no need to reset slot.
+ */
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
+ * @pdev: PCI device struct
+ *
+ * Description: This routine is called by the pci error recovery
+ * code after the PCI slot has been reset, just before we
+ * should resume normal operations.
+ */
+pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r, i;
+ struct amdgpu_reset_context reset_context;
+ u32 memsize;
+ struct list_head device_list;
+
+ DRM_INFO("PCI error: slot reset callback!!\n");
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ INIT_LIST_HEAD(&device_list);
+ list_add_tail(&adev->reset_list, &device_list);
+
+ /* wait for asic to come out of reset */
+ msleep(500);
+
+ /* Restore PCI confspace */
+ amdgpu_device_load_pci_state(pdev);
+
+ /* confirm ASIC came out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ memsize = amdgpu_asic_get_config_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+ if (memsize == 0xffffffff) {
+ r = -ETIME;
+ goto out;
+ }
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+
+ adev->no_hw_access = true;
+ r = amdgpu_device_pre_asic_reset(adev, &reset_context);
+ adev->no_hw_access = false;
+ if (r)
+ goto out;
+
+ r = amdgpu_do_asic_reset(&device_list, &reset_context);
+
+out:
+ if (!r) {
+ if (amdgpu_device_cache_pci_state(adev->pdev))
+ pci_restore_state(adev->pdev);
+
+ DRM_INFO("PCIe error recovery succeeded\n");
+ } else {
+ DRM_ERROR("PCIe error recovery failed, err:%d", r);
+ amdgpu_device_unset_mp1_state(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ }
+
+ return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * amdgpu_pci_resume() - resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation.
+ */
+void amdgpu_pci_resume(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+
+ DRM_INFO("PCI error: resume callback!!\n");
+
+ /* Only continue execution for the case of pci_channel_io_frozen */
+ if (adev->pci_channel_state != pci_channel_io_frozen)
+ return;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ drm_sched_start(&ring->sched, true);
+ }
+
+ amdgpu_device_unset_mp1_state(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
+}
+
+bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ r = pci_save_state(pdev);
+ if (!r) {
+ kfree(adev->pci_state);
+
+ adev->pci_state = pci_store_saved_state(pdev);
+
+ if (!adev->pci_state) {
+ DRM_ERROR("Failed to store PCI saved state");
+ return false;
+ }
+ } else {
+ DRM_WARN("Failed to save PCI state, err:%d\n", r);
+ return false;
+ }
+
+ return true;
+}
+
+bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ if (!adev->pci_state)
+ return false;
+
+ r = pci_load_saved_state(pdev, adev->pci_state);
+
+ if (!r) {
+ pci_restore_state(pdev);
+ } else {
+ DRM_WARN("Failed to load PCI state, err:%d\n", r);
+ return false;
+ }
+
+ return true;
+}
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+#endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+ return;
+
+ if (ring && ring->funcs->emit_hdp_flush)
+ amdgpu_ring_emit_hdp_flush(ring);
+ else
+ amdgpu_asic_flush_hdp(adev, ring);
+}
+
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+ return;
+#endif
+ if (adev->gmc.xgmi.connected_to_cpu)
+ return;
+
+ amdgpu_asic_invalidate_hdp(adev, ring);
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+ return atomic_read(&adev->reset_domain->in_gpu_reset);
+}
+
+/**
+ * amdgpu_device_halt() - bring hardware to some kind of halt state
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Bring hardware to some kind of halt state so that no one can touch it
+ * any more. It will help to maintain error context when error occurred.
+ * Compare to a simple hang, the system will keep stable at least for SSH
+ * access. Then it should be trivial to inspect the hardware state and
+ * see what's going on. Implemented as following:
+ *
+ * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
+ * clears all CPU mappings to device, disallows remappings through page faults
+ * 2. amdgpu_irq_disable_all() disables all interrupts
+ * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
+ * 4. set adev->no_hw_access to avoid potential crashes after setp 5
+ * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
+ * 6. pci_disable_device() and pci_wait_for_pending_transaction()
+ * flush any in flight DMA operations
+ */
+void amdgpu_device_halt(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev = adev->pdev;
+ struct drm_device *ddev = adev_to_drm(adev);
+
+ amdgpu_xcp_dev_unplug(adev);
+ drm_dev_unplug(ddev);
+
+ amdgpu_irq_disable_all(adev);
+
+ amdgpu_fence_driver_hw_fini(adev);
+
+ adev->no_hw_access = true;
+
+ amdgpu_device_unmap_mmio(adev);
+
+ pci_disable_device(pdev);
+ pci_wait_for_pending_transaction(pdev);
+}
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+ u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+ u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg * 4);
+ (void)RREG32(address);
+ WREG32(data, v);
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang.
+ * Returns: NULL if we switched to the new gang or a reference to the current
+ * gang leader.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+ struct dma_fence *gang)
+{
+ struct dma_fence *old = NULL;
+
+ do {
+ dma_fence_put(old);
+ rcu_read_lock();
+ old = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+
+ if (old == gang)
+ break;
+
+ if (!dma_fence_is_signaled(old))
+ return old;
+
+ } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
+ old, gang) != old);
+
+ dma_fence_put(old);
+ return NULL;
+}
+
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ case CHIP_TOPAZ:
+ /* chips with no display hardware */
+ return false;
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* chips with display hardware */
+ return true;
+ default:
+ /* IP discovery */
+ if (!adev->ip_versions[DCE_HWIP][0] ||
+ (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+ return false;
+ return true;
+ }
+}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+ uint32_t inst, uint32_t reg_addr, char reg_name[],
+ uint32_t expected_value, uint32_t mask)
+{
+ uint32_t ret = 0;
+ uint32_t old_ = 0;
+ uint32_t tmp_ = RREG32(reg_addr);
+ uint32_t loop = adev->usec_timeout;
+
+ while ((tmp_ & (mask)) != (expected_value)) {
+ if (old_ != tmp_) {
+ loop = adev->usec_timeout;
+ old_ = tmp_;
+ } else
+ udelay(1);
+ tmp_ = RREG32(reg_addr);
+ loop--;
+ if (!loop) {
+ DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000..1538b2dbf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+ bool hash_64k;
+ bool hash_2m;
+ bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+ void (*sw_init)(struct amdgpu_device *adev);
+ void (*sw_fini)(struct amdgpu_device *adev);
+ void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+ bool enable);
+ u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+ u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u64 *flags);
+ void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+ bool enable);
+ int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+ int counter_idx, int is_add);
+ int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+ int counter_idx, int is_remove);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ int counter_idx, uint64_t *count);
+ uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+ void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+ uint32_t ficadl_val, uint32_t ficadh_val);
+ bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_df {
+ struct amdgpu_df_hash_status hash_status;
+ const struct amdgpu_df_funcs *funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
new file mode 100644
index 000000000..68a901287
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -0,0 +1,2635 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_discovery.h"
+#include "soc15_hw_ip.h"
+#include "discovery.h"
+
+#include "soc15.h"
+#include "gfx_v9_0.h"
+#include "gfx_v9_4_3.h"
+#include "gmc_v9_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
+#include "df_v4_3.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
+#include "nbio_v7_9.h"
+#include "hdp_v4_0.h"
+#include "vega10_ih.h"
+#include "vega20_ih.h"
+#include "sdma_v4_0.h"
+#include "sdma_v4_4_2.h"
+#include "uvd_v7_0.h"
+#include "vce_v4_0.h"
+#include "vcn_v1_0.h"
+#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
+#include "smuio_v9_0.h"
+#include "gmc_v10_0.h"
+#include "gmc_v11_0.h"
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
+#include "nbio_v4_3.h"
+#include "nbio_v7_2.h"
+#include "nbio_v7_7.h"
+#include "hdp_v5_0.h"
+#include "hdp_v5_2.h"
+#include "hdp_v6_0.h"
+#include "nv.h"
+#include "soc21.h"
+#include "navi10_ih.h"
+#include "ih_v6_0.h"
+#include "ih_v6_1.h"
+#include "gfx_v10_0.h"
+#include "gfx_v11_0.h"
+#include "sdma_v5_0.h"
+#include "sdma_v5_2.h"
+#include "sdma_v6_0.h"
+#include "lsdma_v6_0.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v3_0.h"
+#include "jpeg_v3_0.h"
+#include "vcn_v4_0.h"
+#include "jpeg_v4_0.h"
+#include "vcn_v4_0_3.h"
+#include "jpeg_v4_0_3.h"
+#include "amdgpu_vkms.h"
+#include "mes_v10_1.h"
+#include "mes_v11_0.h"
+#include "smuio_v11_0.h"
+#include "smuio_v11_0_6.h"
+#include "smuio_v13_0.h"
+#include "smuio_v13_0_3.h"
+#include "smuio_v13_0_6.h"
+
+#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
+MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
+
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMP0_SMN_C2PMSG_33 0x16061
+#define mmMM_INDEX 0x0
+#define mmMM_INDEX_HI 0x6
+#define mmMM_DATA 0x1
+
+static const char *hw_id_names[HW_ID_MAX] = {
+ [MP1_HWID] = "MP1",
+ [MP2_HWID] = "MP2",
+ [THM_HWID] = "THM",
+ [SMUIO_HWID] = "SMUIO",
+ [FUSE_HWID] = "FUSE",
+ [CLKA_HWID] = "CLKA",
+ [PWR_HWID] = "PWR",
+ [GC_HWID] = "GC",
+ [UVD_HWID] = "UVD",
+ [AUDIO_AZ_HWID] = "AUDIO_AZ",
+ [ACP_HWID] = "ACP",
+ [DCI_HWID] = "DCI",
+ [DMU_HWID] = "DMU",
+ [DCO_HWID] = "DCO",
+ [DIO_HWID] = "DIO",
+ [XDMA_HWID] = "XDMA",
+ [DCEAZ_HWID] = "DCEAZ",
+ [DAZ_HWID] = "DAZ",
+ [SDPMUX_HWID] = "SDPMUX",
+ [NTB_HWID] = "NTB",
+ [IOHC_HWID] = "IOHC",
+ [L2IMU_HWID] = "L2IMU",
+ [VCE_HWID] = "VCE",
+ [MMHUB_HWID] = "MMHUB",
+ [ATHUB_HWID] = "ATHUB",
+ [DBGU_NBIO_HWID] = "DBGU_NBIO",
+ [DFX_HWID] = "DFX",
+ [DBGU0_HWID] = "DBGU0",
+ [DBGU1_HWID] = "DBGU1",
+ [OSSSYS_HWID] = "OSSSYS",
+ [HDP_HWID] = "HDP",
+ [SDMA0_HWID] = "SDMA0",
+ [SDMA1_HWID] = "SDMA1",
+ [SDMA2_HWID] = "SDMA2",
+ [SDMA3_HWID] = "SDMA3",
+ [LSDMA_HWID] = "LSDMA",
+ [ISP_HWID] = "ISP",
+ [DBGU_IO_HWID] = "DBGU_IO",
+ [DF_HWID] = "DF",
+ [CLKB_HWID] = "CLKB",
+ [FCH_HWID] = "FCH",
+ [DFX_DAP_HWID] = "DFX_DAP",
+ [L1IMU_PCIE_HWID] = "L1IMU_PCIE",
+ [L1IMU_NBIF_HWID] = "L1IMU_NBIF",
+ [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR",
+ [L1IMU3_HWID] = "L1IMU3",
+ [L1IMU4_HWID] = "L1IMU4",
+ [L1IMU5_HWID] = "L1IMU5",
+ [L1IMU6_HWID] = "L1IMU6",
+ [L1IMU7_HWID] = "L1IMU7",
+ [L1IMU8_HWID] = "L1IMU8",
+ [L1IMU9_HWID] = "L1IMU9",
+ [L1IMU10_HWID] = "L1IMU10",
+ [L1IMU11_HWID] = "L1IMU11",
+ [L1IMU12_HWID] = "L1IMU12",
+ [L1IMU13_HWID] = "L1IMU13",
+ [L1IMU14_HWID] = "L1IMU14",
+ [L1IMU15_HWID] = "L1IMU15",
+ [WAFLC_HWID] = "WAFLC",
+ [FCH_USB_PD_HWID] = "FCH_USB_PD",
+ [PCIE_HWID] = "PCIE",
+ [PCS_HWID] = "PCS",
+ [DDCL_HWID] = "DDCL",
+ [SST_HWID] = "SST",
+ [IOAGR_HWID] = "IOAGR",
+ [NBIF_HWID] = "NBIF",
+ [IOAPIC_HWID] = "IOAPIC",
+ [SYSTEMHUB_HWID] = "SYSTEMHUB",
+ [NTBCCP_HWID] = "NTBCCP",
+ [UMC_HWID] = "UMC",
+ [SATA_HWID] = "SATA",
+ [USB_HWID] = "USB",
+ [CCXSEC_HWID] = "CCXSEC",
+ [XGMI_HWID] = "XGMI",
+ [XGBE_HWID] = "XGBE",
+ [MP0_HWID] = "MP0",
+};
+
+static int hw_id_map[MAX_HWIP] = {
+ [GC_HWIP] = GC_HWID,
+ [HDP_HWIP] = HDP_HWID,
+ [SDMA0_HWIP] = SDMA0_HWID,
+ [SDMA1_HWIP] = SDMA1_HWID,
+ [SDMA2_HWIP] = SDMA2_HWID,
+ [SDMA3_HWIP] = SDMA3_HWID,
+ [LSDMA_HWIP] = LSDMA_HWID,
+ [MMHUB_HWIP] = MMHUB_HWID,
+ [ATHUB_HWIP] = ATHUB_HWID,
+ [NBIO_HWIP] = NBIF_HWID,
+ [MP0_HWIP] = MP0_HWID,
+ [MP1_HWIP] = MP1_HWID,
+ [UVD_HWIP] = UVD_HWID,
+ [VCE_HWIP] = VCE_HWID,
+ [DF_HWIP] = DF_HWID,
+ [DCE_HWIP] = DMU_HWID,
+ [OSSSYS_HWIP] = OSSSYS_HWID,
+ [SMUIO_HWIP] = SMUIO_HWID,
+ [PWR_HWIP] = PWR_HWID,
+ [NBIF_HWIP] = NBIF_HWID,
+ [THM_HWIP] = THM_HWID,
+ [CLK_HWIP] = CLKA_HWID,
+ [UMC_HWIP] = UMC_HWID,
+ [XGMI_HWIP] = XGMI_HWID,
+ [DCI_HWIP] = DCI_HWID,
+ [PCIE_HWIP] = PCIE_HWID,
+};
+
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+ u64 tmr_offset, tmr_size, pos;
+ void *discv_regn;
+ int ret;
+
+ ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+ if (ret)
+ return ret;
+
+ pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+ /* This region is read-only and reserved from system use */
+ discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
+ if (discv_regn) {
+ memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
+ memunmap(discv_regn);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+ uint8_t *binary)
+{
+ uint64_t vram_size;
+ u32 msg;
+ int i, ret = 0;
+
+ /* It can take up to a second for IFWI init to complete on some dGPUs,
+ * but generally it should be in the 60-100ms range. Normally this starts
+ * as soon as the device gets power so by the time the OS loads this has long
+ * completed. However, when a card is hotplugged via e.g., USB4, we need to
+ * wait for this to complete. Once the C2PMSG is updated, we can
+ * continue.
+ */
+ if (dev_is_removable(&adev->pdev->dev)) {
+ for (i = 0; i < 1000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ msleep(1);
+ }
+ }
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+
+ if (vram_size) {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->mman.discovery_tmr_size, false);
+ } else {
+ ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+ }
+
+ return ret;
+}
+
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
+{
+ const struct firmware *fw;
+ const char *fw_name;
+ int r;
+
+ switch (amdgpu_discovery) {
+ case 2:
+ fw_name = FIRMWARE_IP_DISCOVERY;
+ break;
+ default:
+ dev_warn(adev->dev, "amdgpu_discovery is not set properly\n");
+ return -EINVAL;
+ }
+
+ r = request_firmware(&fw, fw_name, adev->dev);
+ if (r) {
+ dev_err(adev->dev, "can't load firmware \"%s\"\n",
+ fw_name);
+ return r;
+ }
+
+ memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
+ release_firmware(fw);
+
+ return 0;
+}
+
+static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
+{
+ uint16_t checksum = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ checksum += data[i];
+
+ return checksum;
+}
+
+static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
+ uint16_t expected)
+{
+ return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
+}
+
+static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
+{
+ struct binary_header *bhdr;
+ bhdr = (struct binary_header *)binary;
+
+ return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
+}
+
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * So far, apply this quirk only on those Navy Flounder boards which
+ * have a bad harvest table of VCN config.
+ */
+ if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) {
+ switch (adev->pdev->revision) {
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC5:
+ case 0xC7:
+ case 0xCF:
+ case 0xDF:
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int amdgpu_discovery_init(struct amdgpu_device *adev)
+{
+ struct table_info *info;
+ struct binary_header *bhdr;
+ uint16_t offset;
+ uint16_t size;
+ uint16_t checksum;
+ int r;
+
+ adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
+ adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
+ if (!adev->mman.discovery_bin)
+ return -ENOMEM;
+
+ /* Read from file if it is the preferred option */
+ if (amdgpu_discovery == 2) {
+ dev_info(adev->dev, "use ip discovery information from file");
+ r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
+
+ if (r) {
+ dev_err(adev->dev, "failed to read ip discovery binary from file\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ } else {
+ r = amdgpu_discovery_read_binary_from_mem(
+ adev, adev->mman.discovery_bin);
+ if (r)
+ goto out;
+ }
+
+ /* check the ip discovery binary signature */
+ if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
+ dev_err(adev->dev,
+ "get invalid ip discovery binary signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+
+ offset = offsetof(struct binary_header, binary_checksum) +
+ sizeof(bhdr->binary_checksum);
+ size = le16_to_cpu(bhdr->binary_size) - offset;
+ checksum = le16_to_cpu(bhdr->binary_checksum);
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ size, checksum)) {
+ dev_err(adev->dev, "invalid ip discovery binary checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ info = &bhdr->table_list[IP_DISCOVERY];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct ip_discovery_header *ihdr =
+ (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
+ if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery data table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le16_to_cpu(ihdr->size), checksum)) {
+ dev_err(adev->dev, "invalid ip discovery data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[GC];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct gpu_info_header *ghdr =
+ (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery gc table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(ghdr->size), checksum)) {
+ dev_err(adev->dev, "invalid gc data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[HARVEST_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct harvest_info_header *hhdr =
+ (struct harvest_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
+ dev_err(adev->dev, "invalid harvest data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[VCN_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct vcn_info_header *vhdr =
+ (struct vcn_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery vcn table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid vcn data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[MALL_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (0 && offset) {
+ struct mall_info_header *mhdr =
+ (struct mall_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery mall table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid mall data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ return 0;
+
+out:
+ kfree(adev->mman.discovery_bin);
+ adev->mman.discovery_bin = NULL;
+
+ return r;
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev)
+{
+ amdgpu_discovery_sysfs_fini(adev);
+ kfree(adev->mman.discovery_bin);
+ adev->mman.discovery_bin = NULL;
+}
+
+static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
+{
+ if (ip->instance_number >= HWIP_MAX_INSTANCE) {
+ DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
+ ip->instance_number);
+ return -EINVAL;
+ }
+ if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
+ DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n",
+ le16_to_cpu(ip->hw_id));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count)
+{
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct ip_v4 *ip;
+ uint16_t die_offset, ip_offset, num_dies, num_ips;
+ int i, j;
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ /* scan harvest bit of all IP data structures */
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+
+ if (amdgpu_discovery_validate_ip(ip))
+ goto next_ip;
+
+ if (le16_to_cpu(ip->variant) == 1) {
+ switch (le16_to_cpu(ip->hw_id)) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ if (ip->instance_number == 0) {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN0;
+ } else {
+ adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+ adev->vcn.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ adev->jpeg.inst_mask &=
+ ~AMDGPU_VCN_HARVEST_VCN1;
+ }
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ default:
+ break;
+ }
+ }
+next_ip:
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ }
+ }
+}
+
+static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
+ uint32_t *vcn_harvest_count,
+ uint32_t *umc_harvest_count)
+{
+ struct binary_header *bhdr;
+ struct harvest_table *harvest_info;
+ u16 offset;
+ int i;
+ uint32_t umc_harvest_config = 0;
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+
+ if (!offset) {
+ dev_err(adev->dev, "invalid harvest table offset\n");
+ return;
+ }
+
+ harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset);
+
+ for (i = 0; i < 32; i++) {
+ if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
+ break;
+
+ switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
+ case VCN_HWID:
+ (*vcn_harvest_count)++;
+ adev->vcn.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+ adev->jpeg.harvest_config |=
+ (1 << harvest_info->list[i].number_instance);
+
+ adev->vcn.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ adev->jpeg.inst_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case DMU_HWID:
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+ break;
+ case UMC_HWID:
+ umc_harvest_config |=
+ 1 << (le16_to_cpu(harvest_info->list[i].number_instance));
+ (*umc_harvest_count)++;
+ break;
+ case GC_HWID:
+ adev->gfx.xcc_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ case SDMA0_HWID:
+ adev->sdma.sdma_mask &=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+ default:
+ break;
+ }
+ }
+
+ adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+ ~umc_harvest_config;
+}
+
+/* ================================================== */
+
+struct ip_hw_instance {
+ struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */
+
+ int hw_id;
+ u8 num_instance;
+ u8 major, minor, revision;
+ u8 harvest;
+
+ int num_base_addresses;
+ u32 base_addr[];
+};
+
+struct ip_hw_id {
+ struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */
+ int hw_id;
+};
+
+struct ip_die_entry {
+ struct kset ip_kset; /* ip_discovery/die/#die/, contains ip_hw_id */
+ u16 num_ips;
+};
+
+/* -------------------------------------------------- */
+
+struct ip_hw_instance_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf);
+};
+
+static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id);
+}
+
+static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance);
+}
+
+static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->major);
+}
+
+static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->minor);
+}
+
+static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->revision);
+}
+
+static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest);
+}
+
+static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses);
+}
+
+static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+ ssize_t res, at;
+ int ii;
+
+ for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) {
+ /* Here we satisfy the condition that, at + size <= PAGE_SIZE.
+ */
+ if (at + 12 > PAGE_SIZE)
+ break;
+ res = sysfs_emit_at(buf, at, "0x%08X\n",
+ ip_hw_instance->base_addr[ii]);
+ if (res <= 0)
+ break;
+ at += res;
+ }
+
+ return res < 0 ? res : at;
+}
+
+static struct ip_hw_instance_attr ip_hw_attr[] = {
+ __ATTR_RO(hw_id),
+ __ATTR_RO(num_instance),
+ __ATTR_RO(major),
+ __ATTR_RO(minor),
+ __ATTR_RO(revision),
+ __ATTR_RO(harvest),
+ __ATTR_RO(num_base_addresses),
+ __ATTR_RO(base_addr),
+};
+
+static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1];
+ATTRIBUTE_GROUPS(ip_hw_instance);
+
+#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj)
+#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr)
+
+static ssize_t ip_hw_instance_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+ struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr);
+
+ if (!ip_hw_attr->show)
+ return -EIO;
+
+ return ip_hw_attr->show(ip_hw_instance, buf);
+}
+
+static const struct sysfs_ops ip_hw_instance_sysfs_ops = {
+ .show = ip_hw_instance_attr_show,
+};
+
+static void ip_hw_instance_release(struct kobject *kobj)
+{
+ struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+
+ kfree(ip_hw_instance);
+}
+
+static const struct kobj_type ip_hw_instance_ktype = {
+ .release = ip_hw_instance_release,
+ .sysfs_ops = &ip_hw_instance_sysfs_ops,
+ .default_groups = ip_hw_instance_groups,
+};
+
+/* -------------------------------------------------- */
+
+#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset)
+
+static void ip_hw_id_release(struct kobject *kobj)
+{
+ struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj);
+
+ if (!list_empty(&ip_hw_id->hw_id_kset.list))
+ DRM_ERROR("ip_hw_id->hw_id_kset is not empty");
+ kfree(ip_hw_id);
+}
+
+static const struct kobj_type ip_hw_id_ktype = {
+ .release = ip_hw_id_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+/* -------------------------------------------------- */
+
+static void die_kobj_release(struct kobject *kobj);
+static void ip_disc_release(struct kobject *kobj);
+
+struct ip_die_entry_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf);
+};
+
+#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr)
+
+static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips);
+}
+
+/* If there are more ip_die_entry attrs, other than the number of IPs,
+ * we can make this intro an array of attrs, and then initialize
+ * ip_die_entry_attrs in a loop.
+ */
+static struct ip_die_entry_attribute num_ips_attr =
+ __ATTR_RO(num_ips);
+
+static struct attribute *ip_die_entry_attrs[] = {
+ &num_ips_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */
+
+#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset)
+
+static ssize_t ip_die_entry_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr);
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!ip_die_entry_attr->show)
+ return -EIO;
+
+ return ip_die_entry_attr->show(ip_die_entry, buf);
+}
+
+static void ip_die_entry_release(struct kobject *kobj)
+{
+ struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+ if (!list_empty(&ip_die_entry->ip_kset.list))
+ DRM_ERROR("ip_die_entry->ip_kset is not empty");
+ kfree(ip_die_entry);
+}
+
+static const struct sysfs_ops ip_die_entry_sysfs_ops = {
+ .show = ip_die_entry_attr_show,
+};
+
+static const struct kobj_type ip_die_entry_ktype = {
+ .release = ip_die_entry_release,
+ .sysfs_ops = &ip_die_entry_sysfs_ops,
+ .default_groups = ip_die_entry_groups,
+};
+
+static const struct kobj_type die_kobj_ktype = {
+ .release = die_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static const struct kobj_type ip_discovery_ktype = {
+ .release = ip_disc_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+struct ip_discovery_top {
+ struct kobject kobj; /* ip_discovery/ */
+ struct kset die_kset; /* ip_discovery/die/, contains ip_die_entry */
+ struct amdgpu_device *adev;
+};
+
+static void die_kobj_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(to_kset(kobj),
+ struct ip_discovery_top,
+ die_kset);
+ if (!list_empty(&ip_top->die_kset.list))
+ DRM_ERROR("ip_top->die_kset is not empty");
+}
+
+static void ip_disc_release(struct kobject *kobj)
+{
+ struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top,
+ kobj);
+ struct amdgpu_device *adev = ip_top->adev;
+
+ adev->ip_top = NULL;
+ kfree(ip_top);
+}
+
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+ uint16_t hw_id, uint8_t inst)
+{
+ uint8_t harvest = 0;
+
+ /* Until a uniform way is figured, get mask based on hwid */
+ switch (hw_id) {
+ case VCN_HWID:
+ harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+ break;
+ case DMU_HWID:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+ harvest = 0x1;
+ break;
+ case UMC_HWID:
+ /* TODO: It needs another parsing; for now, ignore.*/
+ break;
+ case GC_HWID:
+ harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+ break;
+ case SDMA0_HWID:
+ harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+ break;
+ default:
+ break;
+ }
+
+ return harvest;
+}
+
+static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
+ struct ip_die_entry *ip_die_entry,
+ const size_t _ip_offset, const int num_ips,
+ bool reg_base_64)
+{
+ int ii, jj, kk, res;
+
+ DRM_DEBUG("num_ips:%d", num_ips);
+
+ /* Find all IPs of a given HW ID, and add their instance to
+ * #die/#hw_id/#instance/<attributes>
+ */
+ for (ii = 0; ii < HW_ID_MAX; ii++) {
+ struct ip_hw_id *ip_hw_id = NULL;
+ size_t ip_offset = _ip_offset;
+
+ for (jj = 0; jj < num_ips; jj++) {
+ struct ip_v4 *ip;
+ struct ip_hw_instance *ip_hw_instance;
+
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+ if (amdgpu_discovery_validate_ip(ip) ||
+ le16_to_cpu(ip->hw_id) != ii)
+ goto next_ip;
+
+ DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
+
+ /* We have a hw_id match; register the hw
+ * block if not yet registered.
+ */
+ if (!ip_hw_id) {
+ ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL);
+ if (!ip_hw_id)
+ return -ENOMEM;
+ ip_hw_id->hw_id = ii;
+
+ kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii);
+ ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset;
+ ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype;
+ res = kset_register(&ip_hw_id->hw_id_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_hw_id kset");
+ kfree(ip_hw_id);
+ return res;
+ }
+ if (hw_id_names[ii]) {
+ res = sysfs_create_link(&ip_die_entry->ip_kset.kobj,
+ &ip_hw_id->hw_id_kset.kobj,
+ hw_id_names[ii]);
+ if (res) {
+ DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n",
+ hw_id_names[ii],
+ kobject_name(&ip_die_entry->ip_kset.kobj));
+ }
+ }
+ }
+
+ /* Now register its instance.
+ */
+ ip_hw_instance = kzalloc(struct_size(ip_hw_instance,
+ base_addr,
+ ip->num_base_address),
+ GFP_KERNEL);
+ if (!ip_hw_instance) {
+ DRM_ERROR("no memory for ip_hw_instance");
+ return -ENOMEM;
+ }
+ ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
+ ip_hw_instance->num_instance = ip->instance_number;
+ ip_hw_instance->major = ip->major;
+ ip_hw_instance->minor = ip->minor;
+ ip_hw_instance->revision = ip->revision;
+ ip_hw_instance->harvest =
+ amdgpu_discovery_get_harvest_info(
+ adev, ip_hw_instance->hw_id,
+ ip_hw_instance->num_instance);
+ ip_hw_instance->num_base_addresses = ip->num_base_address;
+
+ for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
+ if (reg_base_64)
+ ip_hw_instance->base_addr[kk] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
+ else
+ ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+ }
+
+ kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
+ ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
+ res = kobject_add(&ip_hw_instance->kobj, NULL,
+ "%d", ip_hw_instance->num_instance);
+next_ip:
+ if (reg_base_64)
+ ip_offset += struct_size(ip, base_address_64,
+ ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct kset *die_kset = &adev->ip_top->die_kset;
+ u16 num_dies, die_offset, num_ips;
+ size_t ip_offset;
+ int ii, res;
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ DRM_DEBUG("number of dies: %d\n", num_dies);
+
+ for (ii = 0; ii < num_dies; ii++) {
+ struct ip_die_entry *ip_die_entry;
+
+ die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
+ dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ /* Add the die to the kset.
+ *
+ * dhdr->die_id == ii, which was checked in
+ * amdgpu_discovery_reg_base_init().
+ */
+
+ ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL);
+ if (!ip_die_entry)
+ return -ENOMEM;
+
+ ip_die_entry->num_ips = num_ips;
+
+ kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id));
+ ip_die_entry->ip_kset.kobj.kset = die_kset;
+ ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype;
+ res = kset_register(&ip_die_entry->ip_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register ip_die_entry kset");
+ kfree(ip_die_entry);
+ return res;
+ }
+
+ amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
+{
+ struct kset *die_kset;
+ int res, ii;
+
+ if (!adev->mman.discovery_bin)
+ return -EINVAL;
+
+ adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
+ if (!adev->ip_top)
+ return -ENOMEM;
+
+ adev->ip_top->adev = adev;
+
+ res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype,
+ &adev->dev->kobj, "ip_discovery");
+ if (res) {
+ DRM_ERROR("Couldn't init and add ip_discovery/");
+ goto Err;
+ }
+
+ die_kset = &adev->ip_top->die_kset;
+ kobject_set_name(&die_kset->kobj, "%s", "die");
+ die_kset->kobj.parent = &adev->ip_top->kobj;
+ die_kset->kobj.ktype = &die_kobj_ktype;
+ res = kset_register(&adev->ip_top->die_kset);
+ if (res) {
+ DRM_ERROR("Couldn't register die_kset");
+ goto Err;
+ }
+
+ for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++)
+ ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr;
+ ip_hw_instance_attrs[ii] = NULL;
+
+ res = amdgpu_discovery_sysfs_recurse(adev);
+
+ return res;
+Err:
+ kobject_put(&adev->ip_top->kobj);
+ return res;
+}
+
+/* -------------------------------------------------- */
+
+#define list_to_kobj(el) container_of(el, struct kobject, entry)
+
+static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id)
+{
+ struct list_head *el, *tmp;
+ struct kset *hw_id_kset;
+
+ hw_id_kset = &ip_hw_id->hw_id_kset;
+ spin_lock(&hw_id_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &hw_id_kset->list) {
+ list_del_init(el);
+ spin_unlock(&hw_id_kset->list_lock);
+ /* kobject is embedded in ip_hw_instance */
+ kobject_put(list_to_kobj(el));
+ spin_lock(&hw_id_kset->list_lock);
+ }
+ spin_unlock(&hw_id_kset->list_lock);
+ kobject_put(&ip_hw_id->hw_id_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
+{
+ struct list_head *el, *tmp;
+ struct kset *ip_kset;
+
+ ip_kset = &ip_die_entry->ip_kset;
+ spin_lock(&ip_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &ip_kset->list) {
+ list_del_init(el);
+ spin_unlock(&ip_kset->list_lock);
+ amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el)));
+ spin_lock(&ip_kset->list_lock);
+ }
+ spin_unlock(&ip_kset->list_lock);
+ kobject_put(&ip_die_entry->ip_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct list_head *el, *tmp;
+ struct kset *die_kset;
+
+ die_kset = &adev->ip_top->die_kset;
+ spin_lock(&die_kset->list_lock);
+ list_for_each_prev_safe(el, tmp, &die_kset->list) {
+ list_del_init(el);
+ spin_unlock(&die_kset->list_lock);
+ amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el)));
+ spin_lock(&die_kset->list_lock);
+ }
+ spin_unlock(&die_kset->list_lock);
+ kobject_put(&adev->ip_top->die_kset.kobj);
+ kobject_put(&adev->ip_top->kobj);
+}
+
+/* ================================================== */
+
+static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct ip_v4 *ip;
+ uint16_t die_offset;
+ uint16_t ip_offset;
+ uint16_t num_dies;
+ uint16_t num_ips;
+ uint8_t num_base_address;
+ int hw_ip;
+ int i, j, k;
+ int r;
+
+ r = amdgpu_discovery_init(adev);
+ if (r) {
+ DRM_ERROR("amdgpu_discovery_init failed\n");
+ return r;
+ }
+
+ adev->gfx.xcc_mask = 0;
+ adev->sdma.sdma_mask = 0;
+ adev->vcn.inst_mask = 0;
+ adev->jpeg.inst_mask = 0;
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ DRM_DEBUG("number of dies: %d\n", num_dies);
+
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ if (le16_to_cpu(dhdr->die_id) != i) {
+ DRM_ERROR("invalid die id %d, expected %d\n",
+ le16_to_cpu(dhdr->die_id), i);
+ return -EINVAL;
+ }
+
+ DRM_DEBUG("number of hardware IPs on die%d: %d\n",
+ le16_to_cpu(dhdr->die_id), num_ips);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+
+ if (amdgpu_discovery_validate_ip(ip))
+ goto next_ip;
+
+ num_base_address = ip->num_base_address;
+
+ DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
+ hw_id_names[le16_to_cpu(ip->hw_id)],
+ le16_to_cpu(ip->hw_id),
+ ip->instance_number,
+ ip->major, ip->minor,
+ ip->revision);
+
+ if (le16_to_cpu(ip->hw_id) == VCN_HWID) {
+ /* Bit [5:0]: original revision value
+ * Bit [7:6]: en/decode capability:
+ * 0b00 : VCN function normally
+ * 0b10 : encode is disabled
+ * 0b01 : decode is disabled
+ */
+ adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
+ ip->revision & 0xc0;
+ ip->revision &= ~0xc0;
+ if (adev->vcn.num_vcn_inst <
+ AMDGPU_MAX_VCN_INSTANCES) {
+ adev->vcn.num_vcn_inst++;
+ adev->vcn.inst_mask |=
+ (1U << ip->instance_number);
+ adev->jpeg.inst_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
+ adev->vcn.num_vcn_inst + 1,
+ AMDGPU_MAX_VCN_INSTANCES);
+ }
+ }
+ if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
+ le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
+ if (adev->sdma.num_instances <
+ AMDGPU_MAX_SDMA_INSTANCES) {
+ adev->sdma.num_instances++;
+ adev->sdma.sdma_mask |=
+ (1U << ip->instance_number);
+ } else {
+ dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
+ adev->sdma.num_instances + 1,
+ AMDGPU_MAX_SDMA_INSTANCES);
+ }
+ }
+
+ if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
+ adev->gmc.num_umc++;
+ adev->umc.node_inst_num++;
+ }
+
+ if (le16_to_cpu(ip->hw_id) == GC_HWID)
+ adev->gfx.xcc_mask |=
+ (1U << ip->instance_number);
+
+ for (k = 0; k < num_base_address; k++) {
+ /*
+ * convert the endianness of base addresses in place,
+ * so that we don't need to convert them when accessing adev->reg_offset.
+ */
+ if (ihdr->base_addr_64_bit)
+ /* Truncate the 64bit base address from ip discovery
+ * and only store lower 32bit ip base in reg_offset[].
+ * Bits > 32 follows ASIC specific format, thus just
+ * discard them and handle it within specific ASIC.
+ * By this way reg_offset[] and related helpers can
+ * stay unchanged.
+ * The base address is in dwords, thus clear the
+ * highest 2 bits to store.
+ */
+ ip->base_address[k] =
+ lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
+ else
+ ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+ DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
+ }
+
+ for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
+ if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
+ hw_id_map[hw_ip] != 0) {
+ DRM_DEBUG("set register base offset for %s\n",
+ hw_id_names[le16_to_cpu(ip->hw_id)]);
+ adev->reg_offset[hw_ip][ip->instance_number] =
+ ip->base_address;
+ /* Instance support is somewhat inconsistent.
+ * SDMA is a good example. Sienna cichlid has 4 total
+ * SDMA instances, each enumerated separately (HWIDs
+ * 42, 43, 68, 69). Arcturus has 8 total SDMA instances,
+ * but they are enumerated as multiple instances of the
+ * same HWIDs (4x HWID 42, 4x HWID 43). UMC is another
+ * example. On most chips there are multiple instances
+ * with the same HWID.
+ */
+ adev->ip_versions[hw_ip][ip->instance_number] =
+ IP_VERSION(ip->major, ip->minor, ip->revision);
+ }
+ }
+
+next_ip:
+ if (ihdr->base_addr_64_bit)
+ ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+ else
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+{
+ int vcn_harvest_count = 0;
+ int umc_harvest_count = 0;
+
+ /*
+ * Harvest table does not fit Navi1x and legacy GPUs,
+ * so read harvest bit per IP data structure to set
+ * harvest configuration.
+ */
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) &&
+ adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) {
+ if ((adev->pdev->device == 0x731E &&
+ (adev->pdev->revision == 0xC6 ||
+ adev->pdev->revision == 0xC7)) ||
+ (adev->pdev->device == 0x7340 &&
+ adev->pdev->revision == 0xC9) ||
+ (adev->pdev->device == 0x7360 &&
+ adev->pdev->revision == 0xC7))
+ amdgpu_discovery_read_harvest_bit_per_ip(adev,
+ &vcn_harvest_count);
+ } else {
+ amdgpu_discovery_read_from_harvest_table(adev,
+ &vcn_harvest_count,
+ &umc_harvest_count);
+ }
+
+ amdgpu_discovery_harvest_config_quirk(adev);
+
+ if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+ adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+ }
+
+ if (umc_harvest_count < adev->gmc.num_umc) {
+ adev->gmc.num_umc -= umc_harvest_count;
+ }
+}
+
+union gc_info {
+ struct gc_info_v1_0 v1;
+ struct gc_info_v1_1 v1_1;
+ struct gc_info_v1_2 v1_2;
+ struct gc_info_v2_0 v2;
+ struct gc_info_v2_1 v2_1;
+};
+
+static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ union gc_info *gc_info;
+ u16 offset;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[GC].offset);
+
+ if (!offset)
+ return 0;
+
+ gc_info = (union gc_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(gc_info->v1.header.version_major)) {
+ case 1:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ if (gc_info->v1.header.version_minor >= 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
+ adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
+ }
+ if (gc_info->v1.header.version_minor >= 2) {
+ adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
+ adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
+ adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc);
+ adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa);
+ adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
+ adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
+ }
+ break;
+ case 2:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ if (gc_info->v2.header.version_minor == 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+ adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled GC info table %d.%d\n",
+ le16_to_cpu(gc_info->v1.header.version_major),
+ le16_to_cpu(gc_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union mall_info {
+ struct mall_info_v1_0 v1;
+ struct mall_info_v2_0 v2;
+};
+
+static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ union mall_info *mall_info;
+ u32 u, mall_size_per_umc, m_s_present, half_use;
+ u64 mall_size;
+ u16 offset;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ mall_info = (union mall_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(mall_info->v1.header.version_major)) {
+ case 1:
+ mall_size = 0;
+ mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m);
+ m_s_present = le32_to_cpu(mall_info->v1.m_s_present);
+ half_use = le32_to_cpu(mall_info->v1.m_half_use);
+ for (u = 0; u < adev->gmc.num_umc; u++) {
+ if (m_s_present & (1 << u))
+ mall_size += mall_size_per_umc * 2;
+ else if (half_use & (1 << u))
+ mall_size += mall_size_per_umc / 2;
+ else
+ mall_size += mall_size_per_umc;
+ }
+ adev->gmc.mall_size = mall_size;
+ adev->gmc.m_half_use = half_use;
+ break;
+ case 2:
+ mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+ adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled MALL info table %d.%d\n",
+ le16_to_cpu(mall_info->v1.header.version_major),
+ le16_to_cpu(mall_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union vcn_info {
+ struct vcn_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ union vcn_info *vcn_info;
+ u16 offset;
+ int v;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
+ * but that may change in the future with new GPUs so keep this
+ * check for defensive purposes.
+ */
+ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
+ dev_err(adev->dev, "invalid vcn instances\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
+ case 1:
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * so this won't overflow.
+ */
+ for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
+ adev->vcn.vcn_codec_disable_mask[v] =
+ le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled VCN info table %d.%d\n",
+ le16_to_cpu(vcn_info->v1.header.version_major),
+ le16_to_cpu(vcn_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
+{
+ /* what IP to use for this? */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add common ip block(GC_HWIP:0x%x)\n",
+ adev->ip_versions[GC_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
+{
+ /* use GC or MMHUB IP version */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+ adev->ip_versions[GC_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 3, 0):
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ break;
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 1):
+ case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 1):
+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
+ break;
+ case IP_VERSION(6, 1, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
+ adev->ip_versions[OSSSYS_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ break;
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(11, 5, 0):
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 8):
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block);
+ break;
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 4):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add psp ip block(MP0_HWIP:0x%x)\n",
+ adev->ip_versions[MP0_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_ARCTURUS)
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 8):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(11, 5, 0):
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 11):
+ amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add smu ip block(MP1_HWIP:0x%x)\n",
+ adev->ip_versions[MP1_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#if defined(CONFIG_DRM_AMD_DC)
+static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
+{
+ amdgpu_device_set_sriov_virtual_display(adev);
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+}
+#endif
+
+static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
+{
+ if (adev->enable_virtual_display) {
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+ return 0;
+ }
+
+ if (!amdgpu_device_has_dc_support(adev))
+ return 0;
+
+#if defined(CONFIG_DRM_AMD_DC)
+ if (adev->ip_versions[DCE_HWIP][0]) {
+ switch (adev->ip_versions[DCE_HWIP][0]) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 3):
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 0, 1):
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add dm ip block(DCE_HWIP:0x%x)\n",
+ adev->ip_versions[DCE_HWIP][0]);
+ return -EINVAL;
+ }
+ } else if (adev->ip_versions[DCI_HWIP][0]) {
+ switch (adev->ip_versions[DCI_HWIP][0]) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ case IP_VERSION(12, 1, 0):
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_discovery_set_sriov_display(adev);
+ else
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add dm ip block(DCI_HWIP:0x%x)\n",
+ adev->ip_versions[DCI_HWIP][0]);
+ return -EINVAL;
+ }
+ }
+#endif
+ return 0;
+}
+
+static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ break;
+ case IP_VERSION(9, 4, 3):
+ if (!amdgpu_exp_hw_support)
+ return -EINVAL;
+ amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+ adev->ip_versions[GC_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 2):
+ case IP_VERSION(4, 4, 0):
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+ break;
+ case IP_VERSION(4, 4, 2):
+ amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
+ break;
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 4):
+ case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 7):
+ amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
+ adev->ip_versions[SDMA0_HWIP][0]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
+{
+ if (adev->ip_versions[VCE_HWIP][0]) {
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 2, 0):
+ /* UVD is not supported on vega20 SR-IOV */
+ if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+ amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
+ adev->ip_versions[UVD_HWIP][0]);
+ return -EINVAL;
+ }
+ switch (adev->ip_versions[VCE_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 1, 0):
+ /* VCE is not supported on vega20 SR-IOV */
+ if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+ amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
+ adev->ip_versions[VCE_HWIP][0]);
+ return -EINVAL;
+ }
+ } else {
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
+ break;
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 2, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+ break;
+ case IP_VERSION(2, 0, 3):
+ break;
+ case IP_VERSION(2, 5, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
+ break;
+ case IP_VERSION(2, 6, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v2_6_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_6_ip_block);
+ break;
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 16):
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 0, 2):
+ amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
+ break;
+ case IP_VERSION(3, 0, 33):
+ amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+ break;
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
+ break;
+ case IP_VERSION(4, 0, 3):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
+ adev->ip_versions[UVD_HWIP][0]);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ if (amdgpu_mes) {
+ amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
+ adev->enable_mes = true;
+ if (amdgpu_mes_kiq)
+ adev->enable_mes_kiq = true;
+ }
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ aqua_vanjaram_init_soc_config(adev);
+ break;
+ default:
+ break;
+ }
+}
+
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
+{
+ int r;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 4;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 1, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 0, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 0, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
+ break;
+ case CHIP_VEGA12:
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 4;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 5, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 2, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 1);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
+ break;
+ case CHIP_RAVEN:
+ vega10_reg_base_init(adev);
+ adev->sdma.num_instances = 1;
+ adev->vcn.num_vcn_inst = 1;
+ adev->gmc.num_umc = 2;
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 1);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 5, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 1, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 1);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+ } else {
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 0, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 0);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+ }
+ break;
+ case CHIP_VEGA20:
+ vega20_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 8;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 0);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 1);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 0);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 2, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(7, 2, 0);
+ adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 1, 0);
+ adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
+ break;
+ case CHIP_ARCTURUS:
+ arct_reg_base_init(adev);
+ adev->sdma.num_instances = 8;
+ adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 8;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][2] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][3] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][4] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][5] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[SDMA1_HWIP][6] = IP_VERSION(4, 2, 2);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 1);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 2);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 4);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 3);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 3);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 1);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 5, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
+ break;
+ case CHIP_ALDEBARAN:
+ aldebaran_reg_base_init(adev);
+ adev->sdma.num_instances = 5;
+ adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 4;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][1] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][2] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][3] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[SDMA0_HWIP][4] = IP_VERSION(4, 4, 0);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 2);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 4);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 7, 0);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(13, 0, 2);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 2);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 6, 0);
+ adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
+ adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
+ break;
+ default:
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r)
+ return -EINVAL;
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ break;
+ }
+
+ amdgpu_discovery_init_soc_config(adev);
+ amdgpu_discovery_sysfs_init(adev);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ adev->family = AMDGPU_FAMILY_AI;
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ adev->family = AMDGPU_FAMILY_RV;
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->family = AMDGPU_FAMILY_NV;
+ break;
+ case IP_VERSION(10, 3, 1):
+ adev->family = AMDGPU_FAMILY_VGH;
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ break;
+ case IP_VERSION(10, 3, 3):
+ adev->family = AMDGPU_FAMILY_YC;
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->family = AMDGPU_FAMILY_GC_10_3_6;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->family = AMDGPU_FAMILY_GC_10_3_7;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_0_0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->family = AMDGPU_FAMILY_GC_11_0_1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->flags |= AMD_IS_APU;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(4, 8, 0))
+ adev->gmc.xgmi.supported = true;
+
+ /* set NBIO version */
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 2, 0):
+ adev->nbio.funcs = &nbio_v6_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ case IP_VERSION(2, 5, 0):
+ adev->nbio.funcs = &nbio_v7_0_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ adev->nbio.funcs = &nbio_v7_4_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 9, 0):
+ adev->nbio.funcs = &nbio_v7_9_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 2, 0):
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ case IP_VERSION(7, 5, 1):
+ adev->nbio.funcs = &nbio_v7_2_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg;
+ break;
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 3, 1):
+ case IP_VERSION(2, 3, 2):
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ case IP_VERSION(3, 3, 3):
+ adev->nbio.funcs = &nbio_v2_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ if (amdgpu_sriov_vf(adev))
+ adev->nbio.funcs = &nbio_v4_3_sriov_funcs;
+ else
+ adev->nbio.funcs = &nbio_v4_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
+ break;
+ case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ adev->nbio.funcs = &nbio_v7_7_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
+ break;
+ default:
+ break;
+ }
+
+ switch (adev->ip_versions[HDP_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 1):
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 1):
+ case IP_VERSION(4, 4, 0):
+ case IP_VERSION(4, 4, 2):
+ adev->hdp.funcs = &hdp_v4_0_funcs;
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 0, 4):
+ case IP_VERSION(5, 2, 0):
+ adev->hdp.funcs = &hdp_v5_0_funcs;
+ break;
+ case IP_VERSION(5, 2, 1):
+ adev->hdp.funcs = &hdp_v5_2_funcs;
+ break;
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 1, 0):
+ adev->hdp.funcs = &hdp_v6_0_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (adev->ip_versions[DF_HWIP][0]) {
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(3, 6, 1):
+ case IP_VERSION(3, 6, 2):
+ adev->df.funcs = &df_v3_6_funcs;
+ break;
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 5, 2):
+ adev->df.funcs = &df_v1_7_funcs;
+ break;
+ case IP_VERSION(4, 3, 0):
+ adev->df.funcs = &df_v4_3_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (adev->ip_versions[SMUIO_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(10, 0, 2):
+ adev->smuio.funcs = &smuio_v9_0_funcs;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 8):
+ adev->smuio.funcs = &smuio_v11_0_funcs;
+ break;
+ case IP_VERSION(11, 0, 6):
+ case IP_VERSION(11, 0, 10):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 9):
+ case IP_VERSION(13, 0, 10):
+ adev->smuio.funcs = &smuio_v11_0_6_funcs;
+ break;
+ case IP_VERSION(13, 0, 2):
+ adev->smuio.funcs = &smuio_v13_0_funcs;
+ break;
+ case IP_VERSION(13, 0, 3):
+ adev->smuio.funcs = &smuio_v13_0_3_funcs;
+ if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
+ adev->flags |= AMD_IS_APU;
+ }
+ break;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(14, 0, 0):
+ adev->smuio.funcs = &smuio_v13_0_6_funcs;
+ break;
+ default:
+ break;
+ }
+
+ switch (adev->ip_versions[LSDMA_HWIP][0]) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ adev->lsdma.funcs = &lsdma_v6_0_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_discovery_set_common_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_gmc_ip_blocks(adev);
+ if (r)
+ return r;
+
+ /* For SR-IOV, PSP needs to be initialized before IH */
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_discovery_set_psp_ip_blocks(adev);
+ if (r)
+ return r;
+ r = amdgpu_discovery_set_ih_ip_blocks(adev);
+ if (r)
+ return r;
+ } else {
+ r = amdgpu_discovery_set_ih_ip_blocks(adev);
+ if (r)
+ return r;
+
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = amdgpu_discovery_set_psp_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+ }
+
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = amdgpu_discovery_set_smu_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_discovery_set_display_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_gc_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_sdma_ip_blocks(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+ !amdgpu_sriov_vf(adev)) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ r = amdgpu_discovery_set_smu_ip_blocks(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_discovery_set_mm_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_mes_ip_blocks(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
new file mode 100644
index 000000000..3a2f347bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DISCOVERY__
+#define __AMDGPU_DISCOVERY__
+
+#define DISCOVERY_TMR_SIZE (8 << 10)
+#define DISCOVERY_TMR_OFFSET (64 << 10)
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev);
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
new file mode 100644
index 000000000..578aeba49
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -0,0 +1,1731 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include <asm/div64.h>
+
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_vblank.h>
+
+/**
+ * amdgpu_display_hotplug_work_func - work handler for display hotplug event
+ *
+ * @work: work struct pointer
+ *
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt. It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
+ */
+void amdgpu_display_hotplug_work_func(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ hotplug_work.work);
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+
+ mutex_lock(&mode_config->mutex);
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ amdgpu_connector_hotplug(connector);
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&mode_config->mutex);
+ /* Just fire off a uevent and let userspace tell us what to do */
+ drm_helper_hpd_irq_event(dev);
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
+
+static void amdgpu_display_flip_callback(struct dma_fence *f,
+ struct dma_fence_cb *cb)
+{
+ struct amdgpu_flip_work *work =
+ container_of(cb, struct amdgpu_flip_work, cb);
+
+ dma_fence_put(f);
+ schedule_work(&work->flip_work.work);
+}
+
+static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
+ struct dma_fence **f)
+{
+ struct dma_fence *fence = *f;
+
+ if (fence == NULL)
+ return false;
+
+ *f = NULL;
+
+ if (!dma_fence_add_callback(fence, &work->cb,
+ amdgpu_display_flip_callback))
+ return true;
+
+ dma_fence_put(fence);
+ return false;
+}
+
+static void amdgpu_display_flip_work_func(struct work_struct *__work)
+{
+ struct delayed_work *delayed_work =
+ container_of(__work, struct delayed_work, work);
+ struct amdgpu_flip_work *work =
+ container_of(delayed_work, struct amdgpu_flip_work, flip_work);
+ struct amdgpu_device *adev = work->adev;
+ struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[work->crtc_id];
+
+ struct drm_crtc *crtc = &amdgpu_crtc->base;
+ unsigned long flags;
+ unsigned int i;
+ int vpos, hpos;
+
+ for (i = 0; i < work->shared_count; ++i)
+ if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
+ return;
+
+ /* Wait until we're out of the vertical blank period before the one
+ * targeted by the flip
+ */
+ if (amdgpu_crtc->enabled &&
+ (amdgpu_display_get_crtc_scanoutpos(adev_to_drm(adev), work->crtc_id, 0,
+ &vpos, &hpos, NULL, NULL,
+ &crtc->hwmode)
+ & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
+ (int)(work->target_vblank -
+ amdgpu_get_vblank_counter_kms(crtc)) > 0) {
+ schedule_delayed_work(&work->flip_work, usecs_to_jiffies(1000));
+ return;
+ }
+
+ /* We borrow the event spin lock for protecting flip_status */
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ /* Do the flip (mmio) */
+ adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base, work->async);
+
+ /* Set the flip status */
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+
+}
+
+/*
+ * Handle unpin events outside the interrupt handler proper.
+ */
+static void amdgpu_display_unpin_work_func(struct work_struct *__work)
+{
+ struct amdgpu_flip_work *work =
+ container_of(__work, struct amdgpu_flip_work, unpin_work);
+ int r;
+
+ /* unpin of the old buffer */
+ r = amdgpu_bo_reserve(work->old_abo, true);
+ if (likely(r == 0)) {
+ amdgpu_bo_unpin(work->old_abo);
+ amdgpu_bo_unreserve(work->old_abo);
+ } else
+ DRM_ERROR("failed to reserve buffer after flip\n");
+
+ amdgpu_bo_unref(&work->old_abo);
+ kfree(work->shared);
+ kfree(work);
+}
+
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t page_flip_flags, uint32_t target,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_gem_object *obj;
+ struct amdgpu_flip_work *work;
+ struct amdgpu_bo *new_abo;
+ unsigned long flags;
+ u64 tiling_flags;
+ int i, r;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (work == NULL)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
+ INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
+
+ work->event = event;
+ work->adev = adev;
+ work->crtc_id = amdgpu_crtc->crtc_id;
+ work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
+
+ /* schedule unpin of the old buffer */
+ obj = crtc->primary->fb->obj[0];
+
+ /* take a reference to the old object */
+ work->old_abo = gem_to_amdgpu_bo(obj);
+ amdgpu_bo_ref(work->old_abo);
+
+ obj = fb->obj[0];
+ new_abo = gem_to_amdgpu_bo(obj);
+
+ /* pin the new buffer */
+ r = amdgpu_bo_reserve(new_abo, false);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("failed to reserve new abo buffer before flip\n");
+ goto cleanup;
+ }
+
+ if (!adev->enable_virtual_display) {
+ r = amdgpu_bo_pin(new_abo,
+ amdgpu_display_supported_domains(adev, new_abo->flags));
+ if (unlikely(r != 0)) {
+ DRM_ERROR("failed to pin new abo buffer before flip\n");
+ goto unreserve;
+ }
+ }
+
+ r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", new_abo);
+ goto unpin;
+ }
+
+ r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+ &work->shared_count,
+ &work->shared);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("failed to get fences for buffer\n");
+ goto unpin;
+ }
+
+ amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
+ amdgpu_bo_unreserve(new_abo);
+
+ if (!adev->enable_virtual_display)
+ work->base = amdgpu_bo_gpu_offset(new_abo);
+ work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
+ amdgpu_get_vblank_counter_kms(crtc);
+
+ /* we borrow the event spin lock for protecting flip_wrok */
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_NONE) {
+ DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ r = -EBUSY;
+ goto pflip_cleanup;
+ }
+
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
+ amdgpu_crtc->pflip_works = work;
+
+
+ DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+ /* update crtc fb */
+ crtc->primary->fb = fb;
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ amdgpu_display_flip_work_func(&work->flip_work.work);
+ return 0;
+
+pflip_cleanup:
+ if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
+ DRM_ERROR("failed to reserve new abo in error path\n");
+ goto cleanup;
+ }
+unpin:
+ if (!adev->enable_virtual_display)
+ amdgpu_bo_unpin(new_abo);
+
+unreserve:
+ amdgpu_bo_unreserve(new_abo);
+
+cleanup:
+ amdgpu_bo_unref(&work->old_abo);
+ for (i = 0; i < work->shared_count; ++i)
+ dma_fence_put(work->shared[i]);
+ kfree(work->shared);
+ kfree(work);
+
+ return r;
+}
+
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ struct drm_device *dev;
+ struct amdgpu_device *adev;
+ struct drm_crtc *crtc;
+ bool active = false;
+ int ret;
+
+ if (!set || !set->crtc)
+ return -EINVAL;
+
+ dev = set->crtc->dev;
+
+ ret = pm_runtime_get_sync(dev->dev);
+ if (ret < 0)
+ goto out;
+
+ ret = drm_crtc_helper_set_config(set, ctx);
+
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+ if (crtc->enabled)
+ active = true;
+
+ pm_runtime_mark_last_busy(dev->dev);
+
+ adev = drm_to_adev(dev);
+ /* if we have active crtcs and we don't have a power ref,
+ * take the current one
+ */
+ if (active && !adev->have_disp_power_ref) {
+ adev->have_disp_power_ref = true;
+ return ret;
+ }
+ /* if we have no active crtcs, then go to
+ * drop the power ref we got before
+ */
+ if (!active && adev->have_disp_power_ref)
+ adev->have_disp_power_ref = false;
+out:
+ /* drop the power reference we got coming in here */
+ pm_runtime_put_autosuspend(dev->dev);
+ return ret;
+}
+
+static const char *encoder_names[41] = {
+ "NONE",
+ "INTERNAL_LVDS",
+ "INTERNAL_TMDS1",
+ "INTERNAL_TMDS2",
+ "INTERNAL_DAC1",
+ "INTERNAL_DAC2",
+ "INTERNAL_SDVOA",
+ "INTERNAL_SDVOB",
+ "SI170B",
+ "CH7303",
+ "CH7301",
+ "INTERNAL_DVO1",
+ "EXTERNAL_SDVOA",
+ "EXTERNAL_SDVOB",
+ "TITFP513",
+ "INTERNAL_LVTM1",
+ "VT1623",
+ "HDMI_SI1930",
+ "HDMI_INTERNAL",
+ "INTERNAL_KLDSCP_TMDS1",
+ "INTERNAL_KLDSCP_DVO1",
+ "INTERNAL_KLDSCP_DAC1",
+ "INTERNAL_KLDSCP_DAC2",
+ "SI178",
+ "MVPU_FPGA",
+ "INTERNAL_DDI",
+ "VT1625",
+ "HDMI_SI1932",
+ "DP_AN9801",
+ "DP_DP501",
+ "INTERNAL_UNIPHY",
+ "INTERNAL_KLDSCP_LVTMA",
+ "INTERNAL_UNIPHY1",
+ "INTERNAL_UNIPHY2",
+ "NUTMEG",
+ "TRAVIS",
+ "INTERNAL_VCE",
+ "INTERNAL_UNIPHY3",
+ "HDMI_ANX9805",
+ "INTERNAL_AMCLK",
+ "VIRTUAL",
+};
+
+static const char *hpd_names[6] = {
+ "HPD1",
+ "HPD2",
+ "HPD3",
+ "HPD4",
+ "HPD5",
+ "HPD6",
+};
+
+void amdgpu_display_print_display_setup(struct drm_device *dev)
+{
+ struct drm_connector *connector;
+ struct amdgpu_connector *amdgpu_connector;
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+ struct drm_connector_list_iter iter;
+ uint32_t devices;
+ int i = 0;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ DRM_INFO("AMDGPU Display Connectors\n");
+ drm_for_each_connector_iter(connector, &iter) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ DRM_INFO("Connector %d:\n", i);
+ DRM_INFO(" %s\n", connector->name);
+ if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE)
+ DRM_INFO(" %s\n", hpd_names[amdgpu_connector->hpd.hpd]);
+ if (amdgpu_connector->ddc_bus) {
+ DRM_INFO(" DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+ amdgpu_connector->ddc_bus->rec.mask_clk_reg,
+ amdgpu_connector->ddc_bus->rec.mask_data_reg,
+ amdgpu_connector->ddc_bus->rec.a_clk_reg,
+ amdgpu_connector->ddc_bus->rec.a_data_reg,
+ amdgpu_connector->ddc_bus->rec.en_clk_reg,
+ amdgpu_connector->ddc_bus->rec.en_data_reg,
+ amdgpu_connector->ddc_bus->rec.y_clk_reg,
+ amdgpu_connector->ddc_bus->rec.y_data_reg);
+ if (amdgpu_connector->router.ddc_valid)
+ DRM_INFO(" DDC Router 0x%x/0x%x\n",
+ amdgpu_connector->router.ddc_mux_control_pin,
+ amdgpu_connector->router.ddc_mux_state);
+ if (amdgpu_connector->router.cd_valid)
+ DRM_INFO(" Clock/Data Router 0x%x/0x%x\n",
+ amdgpu_connector->router.cd_mux_control_pin,
+ amdgpu_connector->router.cd_mux_state);
+ } else {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_VGA ||
+ connector->connector_type == DRM_MODE_CONNECTOR_DVII ||
+ connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
+ connector->connector_type == DRM_MODE_CONNECTOR_DVIA ||
+ connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+ connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ DRM_INFO(" DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n");
+ }
+ DRM_INFO(" Encoders:\n");
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ devices = amdgpu_encoder->devices & amdgpu_connector->devices;
+ if (devices) {
+ if (devices & ATOM_DEVICE_CRT1_SUPPORT)
+ DRM_INFO(" CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_CRT2_SUPPORT)
+ DRM_INFO(" CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_LCD1_SUPPORT)
+ DRM_INFO(" LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP1_SUPPORT)
+ DRM_INFO(" DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP2_SUPPORT)
+ DRM_INFO(" DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP3_SUPPORT)
+ DRM_INFO(" DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP4_SUPPORT)
+ DRM_INFO(" DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP5_SUPPORT)
+ DRM_INFO(" DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_DFP6_SUPPORT)
+ DRM_INFO(" DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_TV1_SUPPORT)
+ DRM_INFO(" TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ if (devices & ATOM_DEVICE_CV_SUPPORT)
+ DRM_INFO(" CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ }
+ }
+ i++;
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+ bool use_aux)
+{
+ u8 out = 0x0;
+ u8 buf[8];
+ int ret;
+ struct i2c_msg msgs[] = {
+ {
+ .addr = DDC_ADDR,
+ .flags = 0,
+ .len = 1,
+ .buf = &out,
+ },
+ {
+ .addr = DDC_ADDR,
+ .flags = I2C_M_RD,
+ .len = 8,
+ .buf = buf,
+ }
+ };
+
+ /* on hw with routers, select right port */
+ if (amdgpu_connector->router.ddc_valid)
+ amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
+
+ if (use_aux)
+ ret = i2c_transfer(&amdgpu_connector->ddc_bus->aux.ddc, msgs, 2);
+ else
+ ret = i2c_transfer(&amdgpu_connector->ddc_bus->adapter, msgs, 2);
+
+ if (ret != 2)
+ /* Couldn't find an accessible DDC on this connector */
+ return false;
+ /* Probe also for valid EDID header
+ * EDID header starts with:
+ * 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00.
+ * Only the first 6 bytes must be valid as
+ * drm_edid_block_valid() can fix the last 2 bytes
+ */
+ if (drm_edid_header_is_valid(buf) < 6) {
+ /* Couldn't find an accessible EDID on this
+ * connector
+ */
+ return false;
+ }
+ return true;
+}
+
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+ unsigned int flags, unsigned int color,
+ struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+ if (file)
+ return -ENOSYS;
+
+ return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+ num_clips);
+}
+
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+};
+
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+ .dirty = amdgpu_dirtyfb
+};
+
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ uint64_t bo_flags)
+{
+ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+#if defined(CONFIG_DRM_AMD_DC)
+ /*
+ * if amdgpu_bo_support_uswc returns false it means that USWC mappings
+ * is not supported for this board. But this mapping is required
+ * to avoid hang caused by placement of scanout BO in GTT on certain
+ * APUs. So force the BO placement to VRAM in case this architecture
+ * will not allow USWC mappings.
+ * Also, don't allow GTT domain if the BO doesn't have USWC flag set.
+ */
+ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+ amdgpu_bo_support_uswc(bo_flags) &&
+ adev->dc_enabled &&
+ adev->mode_info.gpu_vm_support)
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+#endif
+
+ return domain;
+}
+
+static const struct drm_format_info dcc_formats[] = {
+ { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 2,
+ .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 2,
+ .cpp = { 2, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+};
+
+static const struct drm_format_info dcc_retile_formats[] = {
+ { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+ { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 3,
+ .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+ .has_alpha = true, },
+ { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 3,
+ .cpp = { 2, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+};
+
+static const struct drm_format_info *
+lookup_format_info(const struct drm_format_info formats[],
+ int num_formats, u32 format)
+{
+ int i;
+
+ for (i = 0; i < num_formats; i++) {
+ if (formats[i].format == format)
+ return &formats[i];
+ }
+
+ return NULL;
+}
+
+const struct drm_format_info *
+amdgpu_lookup_format_info(u32 format, uint64_t modifier)
+{
+ if (!IS_AMD_FMT_MOD(modifier))
+ return NULL;
+
+ if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
+ return lookup_format_info(dcc_retile_formats,
+ ARRAY_SIZE(dcc_retile_formats),
+ format);
+
+ if (AMD_FMT_MOD_GET(DCC, modifier))
+ return lookup_format_info(dcc_formats, ARRAY_SIZE(dcc_formats),
+ format);
+
+ /* returning NULL will cause the default format structs to be used. */
+ return NULL;
+}
+
+
+/*
+ * Tries to extract the renderable DCC offset from the opaque metadata attached
+ * to the buffer.
+ */
+static int
+extract_render_dcc_offset(struct amdgpu_device *adev,
+ struct drm_gem_object *obj,
+ uint64_t *offset)
+{
+ struct amdgpu_bo *rbo;
+ int r = 0;
+ uint32_t metadata[10]; /* Something that fits a descriptor + header. */
+ uint32_t size;
+
+ rbo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(rbo, false);
+
+ if (unlikely(r)) {
+ /* Don't show error message when returning -ERESTARTSYS */
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Unable to reserve buffer: %d\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_get_metadata(rbo, metadata, sizeof(metadata), &size, NULL);
+ amdgpu_bo_unreserve(rbo);
+
+ if (r)
+ return r;
+
+ /*
+ * The first word is the metadata version, and we need space for at least
+ * the version + pci vendor+device id + 8 words for a descriptor.
+ */
+ if (size < 40 || metadata[0] != 1)
+ return -EINVAL;
+
+ if (adev->family >= AMDGPU_FAMILY_NV) {
+ /* resource word 6/7 META_DATA_ADDRESS{_LO} */
+ *offset = ((u64)metadata[9] << 16u) |
+ ((metadata[8] & 0xFF000000u) >> 16);
+ } else {
+ /* resource word 5/7 META_DATA_ADDRESS */
+ *offset = ((u64)metadata[9] << 8u) |
+ ((u64)(metadata[7] & 0x1FE0000u) << 23);
+ }
+
+ return 0;
+}
+
+static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
+{
+ struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
+ uint64_t modifier = 0;
+ int num_pipes = 0;
+ int num_pkrs = 0;
+
+ num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+ num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
+ modifier = DRM_FORMAT_MOD_LINEAR;
+ } else {
+ int swizzle = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE);
+ bool has_xor = swizzle >= 16;
+ int block_size_bits;
+ int version;
+ int pipe_xor_bits = 0;
+ int bank_xor_bits = 0;
+ int packers = 0;
+ int rb = 0;
+ int pipes = ilog2(num_pipes);
+ uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
+
+ switch (swizzle >> 2) {
+ case 0: /* 256B */
+ block_size_bits = 8;
+ break;
+ case 1: /* 4KiB */
+ case 5: /* 4KiB _X */
+ block_size_bits = 12;
+ break;
+ case 2: /* 64KiB */
+ case 4: /* 64 KiB _T */
+ case 6: /* 64 KiB _X */
+ block_size_bits = 16;
+ break;
+ case 7: /* 256 KiB */
+ block_size_bits = 18;
+ break;
+ default:
+ /* RESERVED or VAR */
+ return -EINVAL;
+ }
+
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX11;
+ else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
+ else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX10;
+ else
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+
+ switch (swizzle & 3) {
+ case 0: /* Z microtiling */
+ return -EINVAL;
+ case 1: /* S microtiling */
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (!has_xor)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
+ break;
+ case 2:
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (!has_xor && afb->base.format->cpp[0] != 4)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
+ break;
+ case 3:
+ break;
+ }
+
+ if (has_xor) {
+ if (num_pipes == num_pkrs && num_pkrs == 0) {
+ DRM_ERROR("invalid number of pipes and packers\n");
+ return -EINVAL;
+ }
+
+ switch (version) {
+ case AMD_FMT_MOD_TILE_VER_GFX11:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+ break;
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ packers = min(block_size_bits - 8 - pipe_xor_bits,
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs));
+ break;
+ case AMD_FMT_MOD_TILE_VER_GFX10:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ break;
+ case AMD_FMT_MOD_TILE_VER_GFX9:
+ rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) +
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se);
+ pipe_xor_bits = min(block_size_bits - 8, pipes +
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_se));
+ bank_xor_bits = min(block_size_bits - 8 - pipe_xor_bits,
+ ilog2(adev->gfx.config.gb_addr_config_fields.num_banks));
+ break;
+ }
+ }
+
+ modifier = AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) |
+ AMD_FMT_MOD_SET(TILE_VERSION, version) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, packers);
+
+ if (dcc_offset != 0) {
+ bool dcc_i64b = AMDGPU_TILING_GET(afb->tiling_flags, DCC_INDEPENDENT_64B) != 0;
+ bool dcc_i128b = version >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
+ const struct drm_format_info *format_info;
+ u64 render_dcc_offset;
+
+ /* Enable constant encode on RAVEN2 and later. */
+ bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->external_rev_id >= 0x81)) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0);
+
+ int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
+ dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
+ AMD_FMT_MOD_DCC_BLOCK_256B;
+
+ modifier |= AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, dcc_constant_encode) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, dcc_i64b) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, dcc_i128b) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_cblock_size);
+
+ afb->base.offsets[1] = dcc_offset * 256 + afb->base.offsets[0];
+ afb->base.pitches[1] =
+ AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1;
+
+ /*
+ * If the userspace driver uses retiling the tiling flags do not contain
+ * info on the renderable DCC buffer. Luckily the opaque metadata contains
+ * the info so we can try to extract it. The kernel does not use this info
+ * but we should convert it to a modifier plane for getfb2, so the
+ * userspace driver that gets it doesn't have to juggle around another DCC
+ * plane internally.
+ */
+ if (extract_render_dcc_offset(adev, afb->base.obj[0],
+ &render_dcc_offset) == 0 &&
+ render_dcc_offset != 0 &&
+ render_dcc_offset != afb->base.offsets[1] &&
+ render_dcc_offset < UINT_MAX) {
+ uint32_t dcc_block_bits; /* of base surface data */
+
+ modifier |= AMD_FMT_MOD_SET(DCC_RETILE, 1);
+ afb->base.offsets[2] = render_dcc_offset;
+
+ if (adev->family >= AMDGPU_FAMILY_NV) {
+ int extra_pipe = 0;
+
+ if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) &&
+ pipes == packers && pipes > 1)
+ extra_pipe = 1;
+
+ dcc_block_bits = max(20, 16 + pipes + extra_pipe);
+ } else {
+ modifier |= AMD_FMT_MOD_SET(RB, rb) |
+ AMD_FMT_MOD_SET(PIPE, pipes);
+ dcc_block_bits = max(20, 18 + rb);
+ }
+
+ dcc_block_bits -= ilog2(afb->base.format->cpp[0]);
+ afb->base.pitches[2] = ALIGN(afb->base.width,
+ 1u << ((dcc_block_bits + 1) / 2));
+ }
+ format_info = amdgpu_lookup_format_info(afb->base.format->format,
+ modifier);
+ if (!format_info)
+ return -EINVAL;
+
+ afb->base.format = format_info;
+ }
+ }
+
+ afb->base.modifier = modifier;
+ afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+ return 0;
+}
+
+/* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */
+static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
+{
+ u64 micro_tile_mode;
+
+ /* Zero swizzle mode means linear */
+ if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+ return 0;
+
+ micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
+ switch (micro_tile_mode) {
+ case 0: /* DISPLAY */
+ case 3: /* RENDER */
+ return 0;
+ default:
+ drm_dbg_kms(afb->base.dev,
+ "Micro tile mode %llu not supported for scanout\n",
+ micro_tile_mode);
+ return -EINVAL;
+ }
+}
+
+static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
+ unsigned int *width, unsigned int *height)
+{
+ unsigned int cpp_log2 = ilog2(cpp);
+ unsigned int pixel_log2 = block_log2 - cpp_log2;
+ unsigned int width_log2 = (pixel_log2 + 1) / 2;
+ unsigned int height_log2 = pixel_log2 - width_log2;
+
+ *width = 1 << width_log2;
+ *height = 1 << height_log2;
+}
+
+static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
+ bool pipe_aligned)
+{
+ unsigned int ver = AMD_FMT_MOD_GET(TILE_VERSION, modifier);
+
+ switch (ver) {
+ case AMD_FMT_MOD_TILE_VER_GFX9: {
+ /*
+ * TODO: for pipe aligned we may need to check the alignment of the
+ * total size of the surface, which may need to be bigger than the
+ * natural alignment due to some HW workarounds
+ */
+ return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
+ }
+ case AMD_FMT_MOD_TILE_VER_GFX10:
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+ case AMD_FMT_MOD_TILE_VER_GFX11: {
+ int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+
+ if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+ AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
+ ++pipes_log2;
+
+ return max(8 + (pipe_aligned ? pipes_log2 : 0), 12);
+ }
+ default:
+ return 0;
+ }
+}
+
+static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane,
+ const struct drm_format_info *format,
+ unsigned int block_width, unsigned int block_height,
+ unsigned int block_size_log2)
+{
+ unsigned int width = rfb->base.width /
+ ((plane && plane < format->num_planes) ? format->hsub : 1);
+ unsigned int height = rfb->base.height /
+ ((plane && plane < format->num_planes) ? format->vsub : 1);
+ unsigned int cpp = plane < format->num_planes ? format->cpp[plane] : 1;
+ unsigned int block_pitch = block_width * cpp;
+ unsigned int min_pitch = ALIGN(width * cpp, block_pitch);
+ unsigned int block_size = 1 << block_size_log2;
+ uint64_t size;
+
+ if (rfb->base.pitches[plane] % block_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is not a multiple of block pitch %d\n",
+ rfb->base.pitches[plane], plane, block_pitch);
+ return -EINVAL;
+ }
+ if (rfb->base.pitches[plane] < min_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is less than minimum pitch %d\n",
+ rfb->base.pitches[plane], plane, min_pitch);
+ return -EINVAL;
+ }
+
+ /* Force at least natural alignment. */
+ if (rfb->base.offsets[plane] % block_size) {
+ drm_dbg_kms(rfb->base.dev,
+ "offset 0x%x for plane %d is not a multiple of block pitch 0x%x\n",
+ rfb->base.offsets[plane], plane, block_size);
+ return -EINVAL;
+ }
+
+ size = rfb->base.offsets[plane] +
+ (uint64_t)rfb->base.pitches[plane] / block_pitch *
+ block_size * DIV_ROUND_UP(height, block_height);
+
+ if (rfb->base.obj[0]->size < size) {
+ drm_dbg_kms(rfb->base.dev,
+ "BO size 0x%zx is less than 0x%llx required for plane %d\n",
+ rfb->base.obj[0]->size, size, plane);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+{
+ const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format);
+ uint64_t modifier = rfb->base.modifier;
+ int ret;
+ unsigned int i, block_width, block_height, block_size_log2;
+
+ if (rfb->base.dev->mode_config.fb_modifiers_not_supported)
+ return 0;
+
+ for (i = 0; i < format_info->num_planes; ++i) {
+ if (modifier == DRM_FORMAT_MOD_LINEAR) {
+ block_width = 256 / format_info->cpp[i];
+ block_height = 1;
+ block_size_log2 = 8;
+ } else {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch ((swizzle & ~3) + 1) {
+ case DC_SW_256B_S:
+ block_size_log2 = 8;
+ break;
+ case DC_SW_4KB_S:
+ case DC_SW_4KB_S_X:
+ block_size_log2 = 12;
+ break;
+ case DC_SW_64KB_S:
+ case DC_SW_64KB_S_T:
+ case DC_SW_64KB_S_X:
+ block_size_log2 = 16;
+ break;
+ case DC_SW_VAR_S_X:
+ block_size_log2 = 18;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
+ }
+
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ if (AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+ block_size_log2 = get_dcc_block_size(modifier, false, false);
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height,
+ block_size_log2);
+ if (ret)
+ return ret;
+
+ ++i;
+ block_size_log2 = get_dcc_block_size(modifier, true, true);
+ } else {
+ bool pipe_aligned = AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
+
+ block_size_log2 = get_dcc_block_size(modifier, true, pipe_aligned);
+ }
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
+ uint64_t *tiling_flags, bool *tmz_surface)
+{
+ struct amdgpu_bo *rbo;
+ int r;
+
+ if (!amdgpu_fb) {
+ *tiling_flags = 0;
+ *tmz_surface = false;
+ return 0;
+ }
+
+ rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]);
+ r = amdgpu_bo_reserve(rbo, false);
+
+ if (unlikely(r)) {
+ /* Don't show error message when returning -ERESTARTSYS */
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Unable to reserve buffer: %d\n", r);
+ return r;
+ }
+
+ if (tiling_flags)
+ amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+
+ if (tmz_surface)
+ *tmz_surface = amdgpu_bo_encrypted(rbo);
+
+ amdgpu_bo_unreserve(rbo);
+
+ return r;
+}
+
+static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
+{
+ int ret;
+
+ rfb->base.obj[0] = obj;
+ drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+ /* Verify that the modifier is supported. */
+ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
+ mode_cmd->modifier[0])) {
+ drm_dbg_kms(dev,
+ "unsupported pixel format %p4cc / modifier 0x%llx\n",
+ &mode_cmd->pixel_format, mode_cmd->modifier[0]);
+
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+ if (ret)
+ goto err;
+
+ if (drm_drv_uses_atomic_modeset(dev))
+ ret = drm_framebuffer_init(dev, &rfb->base,
+ &amdgpu_fb_funcs_atomic);
+ else
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
+ rfb->base.obj[0] = NULL;
+ return ret;
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int ret, i;
+
+ /*
+ * This needs to happen before modifier conversion as that might change
+ * the number of planes.
+ */
+ for (i = 1; i < rfb->base.format->num_planes; ++i) {
+ if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
+ drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n",
+ i, mode_cmd->handles[0], mode_cmd->handles[i]);
+ ret = -EINVAL;
+ return ret;
+ }
+ }
+
+ ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface);
+ if (ret)
+ return ret;
+
+ if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) {
+ drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
+ "GFX9+ requires FB check based on format modifier\n");
+ ret = check_tiling_flags_gfx6(rfb);
+ if (ret)
+ return ret;
+ }
+
+ if (!dev->mode_config.fb_modifiers_not_supported &&
+ !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
+ ret = convert_tiling_flags_to_modifier(rfb);
+ if (ret) {
+ drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
+ rfb->tiling_flags);
+ return ret;
+ }
+ }
+
+ ret = amdgpu_display_verify_sizes(rfb);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rfb->base.format->num_planes; ++i) {
+ drm_gem_object_get(rfb->base.obj[0]);
+ rfb->base.obj[i] = rfb->base.obj[0];
+ }
+
+ return 0;
+}
+
+struct drm_framebuffer *
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+ struct amdgpu_framebuffer *amdgpu_fb;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ uint32_t domains;
+ int ret;
+
+ obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
+ if (obj == NULL) {
+ drm_dbg_kms(dev,
+ "No GEM object associated to handle 0x%08X, can't create framebuffer\n",
+ mode_cmd->handles[0]);
+
+ return ERR_PTR(-ENOENT);
+ }
+
+ /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
+ bo = gem_to_amdgpu_bo(obj);
+ domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
+ if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
+ drm_gem_object_put(obj);
+ return ERR_PTR(-EINVAL);
+ }
+
+ amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL);
+ if (amdgpu_fb == NULL) {
+ drm_gem_object_put(obj);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv,
+ mode_cmd, obj);
+ if (ret) {
+ kfree(amdgpu_fb);
+ drm_gem_object_put(obj);
+ return ERR_PTR(ret);
+ }
+
+ drm_gem_object_put(obj);
+ return &amdgpu_fb->base;
+}
+
+const struct drm_mode_config_funcs amdgpu_mode_funcs = {
+ .fb_create = amdgpu_display_user_framebuffer_create,
+};
+
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
+ { UNDERSCAN_OFF, "off" },
+ { UNDERSCAN_ON, "on" },
+ { UNDERSCAN_AUTO, "auto" },
+};
+
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
+ { AMDGPU_AUDIO_DISABLE, "off" },
+ { AMDGPU_AUDIO_ENABLE, "on" },
+ { AMDGPU_AUDIO_AUTO, "auto" },
+};
+
+/* XXX support different dither options? spatial, temporal, both, etc. */
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
+ { AMDGPU_FMT_DITHER_DISABLE, "off" },
+ { AMDGPU_FMT_DITHER_ENABLE, "on" },
+};
+
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
+{
+ int sz;
+
+ adev->mode_info.coherent_mode_property =
+ drm_property_create_range(adev_to_drm(adev), 0, "coherent", 0, 1);
+ if (!adev->mode_info.coherent_mode_property)
+ return -ENOMEM;
+
+ adev->mode_info.load_detect_property =
+ drm_property_create_range(adev_to_drm(adev), 0, "load detection", 0, 1);
+ if (!adev->mode_info.load_detect_property)
+ return -ENOMEM;
+
+ drm_mode_create_scaling_mode_property(adev_to_drm(adev));
+
+ sz = ARRAY_SIZE(amdgpu_underscan_enum_list);
+ adev->mode_info.underscan_property =
+ drm_property_create_enum(adev_to_drm(adev), 0,
+ "underscan",
+ amdgpu_underscan_enum_list, sz);
+
+ adev->mode_info.underscan_hborder_property =
+ drm_property_create_range(adev_to_drm(adev), 0,
+ "underscan hborder", 0, 128);
+ if (!adev->mode_info.underscan_hborder_property)
+ return -ENOMEM;
+
+ adev->mode_info.underscan_vborder_property =
+ drm_property_create_range(adev_to_drm(adev), 0,
+ "underscan vborder", 0, 128);
+ if (!adev->mode_info.underscan_vborder_property)
+ return -ENOMEM;
+
+ sz = ARRAY_SIZE(amdgpu_audio_enum_list);
+ adev->mode_info.audio_property =
+ drm_property_create_enum(adev_to_drm(adev), 0,
+ "audio",
+ amdgpu_audio_enum_list, sz);
+
+ sz = ARRAY_SIZE(amdgpu_dither_enum_list);
+ adev->mode_info.dither_property =
+ drm_property_create_enum(adev_to_drm(adev), 0,
+ "dither",
+ amdgpu_dither_enum_list, sz);
+
+ if (adev->dc_enabled) {
+ adev->mode_info.abm_level_property =
+ drm_property_create_range(adev_to_drm(adev), 0,
+ "abm level", 0, 4);
+ if (!adev->mode_info.abm_level_property)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void amdgpu_display_update_priority(struct amdgpu_device *adev)
+{
+ /* adjustment options for the display watermarks */
+ if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
+ adev->mode_info.disp_priority = 0;
+ else
+ adev->mode_info.disp_priority = amdgpu_disp_priority;
+
+}
+
+static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
+{
+ /* try and guess if this is a tv or a monitor */
+ if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
+ (mode->vdisplay == 576) || /* 576p */
+ (mode->vdisplay == 720) || /* 720p */
+ (mode->vdisplay == 1080)) /* 1080p */
+ return true;
+ else
+ return false;
+}
+
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_encoder *encoder;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_encoder *amdgpu_encoder;
+ struct drm_connector *connector;
+ u32 src_v = 1, dst_v = 1;
+ u32 src_h = 1, dst_h = 1;
+
+ amdgpu_crtc->h_border = 0;
+ amdgpu_crtc->v_border = 0;
+
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->crtc != crtc)
+ continue;
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ connector = amdgpu_get_connector_for_encoder(encoder);
+
+ /* set scaling */
+ if (amdgpu_encoder->rmx_type == RMX_OFF)
+ amdgpu_crtc->rmx_type = RMX_OFF;
+ else if (mode->hdisplay < amdgpu_encoder->native_mode.hdisplay ||
+ mode->vdisplay < amdgpu_encoder->native_mode.vdisplay)
+ amdgpu_crtc->rmx_type = amdgpu_encoder->rmx_type;
+ else
+ amdgpu_crtc->rmx_type = RMX_OFF;
+ /* copy native mode */
+ memcpy(&amdgpu_crtc->native_mode,
+ &amdgpu_encoder->native_mode,
+ sizeof(struct drm_display_mode));
+ src_v = crtc->mode.vdisplay;
+ dst_v = amdgpu_crtc->native_mode.vdisplay;
+ src_h = crtc->mode.hdisplay;
+ dst_h = amdgpu_crtc->native_mode.hdisplay;
+
+ /* fix up for overscan on hdmi */
+ if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
+ ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
+ ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
+ connector->display_info.is_hdmi &&
+ amdgpu_display_is_hdtv_mode(mode)))) {
+ if (amdgpu_encoder->underscan_hborder != 0)
+ amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
+ else
+ amdgpu_crtc->h_border = (mode->hdisplay >> 5) + 16;
+ if (amdgpu_encoder->underscan_vborder != 0)
+ amdgpu_crtc->v_border = amdgpu_encoder->underscan_vborder;
+ else
+ amdgpu_crtc->v_border = (mode->vdisplay >> 5) + 16;
+ amdgpu_crtc->rmx_type = RMX_FULL;
+ src_v = crtc->mode.vdisplay;
+ dst_v = crtc->mode.vdisplay - (amdgpu_crtc->v_border * 2);
+ src_h = crtc->mode.hdisplay;
+ dst_h = crtc->mode.hdisplay - (amdgpu_crtc->h_border * 2);
+ }
+ }
+ if (amdgpu_crtc->rmx_type != RMX_OFF) {
+ fixed20_12 a, b;
+
+ a.full = dfixed_const(src_v);
+ b.full = dfixed_const(dst_v);
+ amdgpu_crtc->vsc.full = dfixed_div(a, b);
+ a.full = dfixed_const(src_h);
+ b.full = dfixed_const(dst_h);
+ amdgpu_crtc->hsc.full = dfixed_div(a, b);
+ } else {
+ amdgpu_crtc->vsc.full = dfixed_const(1);
+ amdgpu_crtc->hsc.full = dfixed_const(1);
+ }
+ return true;
+}
+
+/*
+ * Retrieve current video scanout position of crtc on a given gpu, and
+ * an optional accurate timestamp of when query happened.
+ *
+ * \param dev Device to query.
+ * \param pipe Crtc to query.
+ * \param flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * For driver internal use only also supports these flags:
+ *
+ * USE_REAL_VBLANKSTART to use the real start of vblank instead
+ * of a fudged earlier start of vblank.
+ *
+ * GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ * fudged earlier start of vblank in *vpos and the distance
+ * to true start of vblank in *hpos.
+ *
+ * \param *vpos Location where vertical scanout position should be stored.
+ * \param *hpos Location where horizontal scanout position should go.
+ * \param *stime Target location for timestamp taken immediately before
+ * scanout position query. Can be NULL to skip timestamp.
+ * \param *etime Target location for timestamp taken immediately after
+ * scanout position query. Can be NULL to skip timestamp.
+ *
+ * Returns vpos as a positive number while in active scanout area.
+ * Returns vpos as a negative number inside vblank, counting the number
+ * of scanlines to go until end of vblank, e.g., -1 means "one scanline
+ * until start of active scanout / end of vblank."
+ *
+ * \return Flags, or'ed together as follows:
+ *
+ * DRM_SCANOUTPOS_VALID = Query successful.
+ * DRM_SCANOUTPOS_INVBL = Inside vblank.
+ * DRM_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of
+ * this flag means that returned position may be offset by a constant but
+ * unknown small number of scanlines wrt. real scanout position.
+ *
+ */
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+ unsigned int pipe, unsigned int flags, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode)
+{
+ u32 vbl = 0, position = 0;
+ int vbl_start, vbl_end, vtotal, ret = 0;
+ bool in_vbl = true;
+
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+
+ /* Get optional system timestamp before query. */
+ if (stime)
+ *stime = ktime_get();
+
+ if (amdgpu_display_page_flip_get_scanoutpos(adev, pipe, &vbl, &position) == 0)
+ ret |= DRM_SCANOUTPOS_VALID;
+
+ /* Get optional system timestamp after query. */
+ if (etime)
+ *etime = ktime_get();
+
+ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+ /* Decode into vertical and horizontal scanout position. */
+ *vpos = position & 0x1fff;
+ *hpos = (position >> 16) & 0x1fff;
+
+ /* Valid vblank area boundaries from gpu retrieved? */
+ if (vbl > 0) {
+ /* Yes: Decode. */
+ ret |= DRM_SCANOUTPOS_ACCURATE;
+ vbl_start = vbl & 0x1fff;
+ vbl_end = (vbl >> 16) & 0x1fff;
+ } else {
+ /* No: Fake something reasonable which gives at least ok results. */
+ vbl_start = mode->crtc_vdisplay;
+ vbl_end = 0;
+ }
+
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
+ }
+
+ /* Fudge vblank to start a few scanlines earlier to handle the
+ * problem that vblank irqs fire a few scanlines before start
+ * of vblank. Some driver internal callers need the true vblank
+ * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+ *
+ * The cause of the "early" vblank irq is that the irq is triggered
+ * by the line buffer logic when the line buffer read position enters
+ * the vblank, whereas our crtc scanout position naturally lags the
+ * line buffer read position.
+ */
+ if (!(flags & USE_REAL_VBLANKSTART))
+ vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
+ /* Test scanout position against vblank region. */
+ if ((*vpos < vbl_start) && (*vpos >= vbl_end))
+ in_vbl = false;
+
+ /* In vblank? */
+ if (in_vbl)
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from fudged earlier vbl_start */
+ *vpos -= vbl_start;
+ return ret;
+ }
+
+ /* Check if inside vblank area and apply corrective offsets:
+ * vpos will then be >=0 in video scanout area, but negative
+ * within vblank area, counting down the number of lines until
+ * start of scanout.
+ */
+
+ /* Inside "upper part" of vblank area? Apply corrective offset if so: */
+ if (in_vbl && (*vpos >= vbl_start)) {
+ vtotal = mode->crtc_vtotal;
+
+ /* With variable refresh rate displays the vpos can exceed
+ * the vtotal value. Clamp to 0 to return -vbl_end instead
+ * of guessing the remaining number of lines until scanout.
+ */
+ *vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0;
+ }
+
+ /* Correct for shifted end of vbl at vbl_end. */
+ *vpos = *vpos - vbl_end;
+
+ return ret;
+}
+
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
+{
+ if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
+ return AMDGPU_CRTC_IRQ_NONE;
+
+ switch (crtc) {
+ case 0:
+ return AMDGPU_CRTC_IRQ_VBLANK1;
+ case 1:
+ return AMDGPU_CRTC_IRQ_VBLANK2;
+ case 2:
+ return AMDGPU_CRTC_IRQ_VBLANK3;
+ case 3:
+ return AMDGPU_CRTC_IRQ_VBLANK4;
+ case 4:
+ return AMDGPU_CRTC_IRQ_VBLANK5;
+ case 5:
+ return AMDGPU_CRTC_IRQ_VBLANK6;
+ default:
+ return AMDGPU_CRTC_IRQ_NONE;
+ }
+}
+
+bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
+ bool in_vblank_irq, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = crtc->index;
+
+ return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+ stime, etime, mode);
+}
+
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_crtc *crtc;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ int r;
+
+ drm_kms_helper_poll_disable(dev);
+
+ /* turn off display hw */
+ drm_modeset_lock_all(dev);
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_OFF);
+ drm_connector_list_iter_end(&iter);
+ drm_modeset_unlock_all(dev);
+ /* unpin the front buffers and cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_framebuffer *fb = crtc->primary->fb;
+ struct amdgpu_bo *robj;
+
+ if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ }
+
+ if (!fb || !fb->obj[0])
+ continue;
+
+ robj = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
+ }
+ }
+ return 0;
+}
+
+int amdgpu_display_resume_helper(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct drm_crtc *crtc;
+ int r;
+
+ /* pin cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r != 0)
+ dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ }
+ }
+
+ drm_helper_resume_force_mode(dev);
+
+ /* turn on display hw */
+ drm_modeset_lock_all(dev);
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_ON);
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock_all(dev);
+
+ drm_kms_helper_poll_enable(dev);
+
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
new file mode 100644
index 000000000..9d19940f7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DISPLAY_H__
+#define __AMDGPU_DISPLAY_H__
+
+#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
+#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
+#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
+#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
+#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
+#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
+#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
+#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
+#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
+#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
+#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
+
+void amdgpu_display_hotplug_work_func(struct work_struct *work);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ uint64_t bo_flags);
+struct drm_framebuffer *
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd);
+const struct drm_format_info *
+amdgpu_lookup_format_info(u32 format, uint64_t modifier);
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
+int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
new file mode 100644
index 000000000..ba3a87cb8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * based on nouveau_prime.c
+ *
+ * Authors: Alex Deucher
+ */
+
+/**
+ * DOC: PRIME Buffer Sharing
+ *
+ * The following callback implementations are used for :ref:`sharing GEM buffer
+ * objects between different devices via PRIME <prime_buffer_sharing>`.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_display.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_xgmi.h"
+#include <drm/amdgpu_drm.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-fence-array.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/pm_runtime.h>
+
+/**
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
+
+ if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ attach->peer2peer = false;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0)
+ goto out;
+
+ return 0;
+
+out:
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+}
+
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+}
+
+/**
+ * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
+ *
+ * @attach: attachment to pin down
+ *
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
+ */
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ /* pin buffer into GTT */
+ return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+}
+
+/**
+ * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
+ *
+ * @attach: attachment to unpin
+ *
+ * Unpin a previously pinned BO to make it movable again.
+ */
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ amdgpu_bo_unpin(bo);
+}
+
+/**
+ * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
+ * @attach: DMA-buf attachment
+ * @dir: DMA direction
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
+ */
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
+{
+ struct dma_buf *dma_buf = attach->dmabuf;
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct sg_table *sgt;
+ long r;
+
+ if (!bo->tbo.pin_count) {
+ /* move buffer into GTT or VRAM */
+ struct ttm_operation_ctx ctx = { false, false };
+ unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+ attach->peer2peer) {
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ domains |= AMDGPU_GEM_DOMAIN_VRAM;
+ }
+ amdgpu_bo_placement_from_domain(bo, domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return ERR_PTR(r);
+
+ } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
+ AMDGPU_GEM_DOMAIN_GTT)) {
+ return ERR_PTR(-EBUSY);
+ }
+
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_TT:
+ sgt = drm_prime_pages_to_sg(obj->dev,
+ bo->tbo.ttm->pages,
+ bo->tbo.ttm->num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
+
+ if (dma_map_sgtable(attach->dev, sgt, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
+ break;
+
+ case TTM_PL_VRAM:
+ r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+ bo->tbo.base.size, attach->dev,
+ dir, &sgt);
+ if (r)
+ return ERR_PTR(r);
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ return sgt;
+
+error_free:
+ sg_free_table(sgt);
+ kfree(sgt);
+ return ERR_PTR(-EBUSY);
+}
+
+/**
+ * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
+ * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * another device. For now, simply unpins the buffer from GTT.
+ */
+static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+ if (sgt->sgl->page_link) {
+ dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+ sg_free_table(sgt);
+ kfree(sgt);
+ } else {
+ amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+ }
+}
+
+/**
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: Shared DMA buffer
+ * @direction: Direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_operation_ctx ctx = { true, false };
+ u32 domain = amdgpu_display_supported_domains(adev, bo->flags);
+ int ret;
+ bool reads = (direction == DMA_BIDIRECTIONAL ||
+ direction == DMA_FROM_DEVICE);
+
+ if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+ return 0;
+
+ /* move to gtt */
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret != 0))
+ return ret;
+
+ if (!bo->tbo.pin_count &&
+ (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ }
+
+ amdgpu_bo_unreserve(bo);
+ return ret;
+}
+
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
+ .attach = amdgpu_dma_buf_attach,
+ .detach = amdgpu_dma_buf_detach,
+ .pin = amdgpu_dma_buf_pin,
+ .unpin = amdgpu_dma_buf_unpin,
+ .map_dma_buf = amdgpu_dma_buf_map,
+ .unmap_dma_buf = amdgpu_dma_buf_unmap,
+ .release = drm_gem_dmabuf_release,
+ .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @gobj: GEM BO
+ * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
+ *
+ * The main work is done by the &drm_gem_prime_export helper.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM BO from the given device.
+ */
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
+ int flags)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct dma_buf *buf;
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+ bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+ return ERR_PTR(-EPERM);
+
+ buf = drm_gem_prime_export(gobj, flags);
+ if (!IS_ERR(buf))
+ buf->ops = &amdgpu_dmabuf_ops;
+
+ return buf;
+}
+
+/**
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
+ * @dev: DRM device
+ * @dma_buf: DMA-buf
+ *
+ * Creates an empty SG BO for DMA-buf import.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+ struct dma_resv *resv = dma_buf->resv;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
+ uint64_t flags = 0;
+ int ret;
+
+ dma_resv_lock(resv, NULL);
+
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+ struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
+
+ flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED);
+ }
+
+ ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_CPU, flags,
+ ttm_bo_type_sg, resv, &gobj, 0);
+ if (ret)
+ goto error;
+
+ bo = gem_to_amdgpu_bo(gobj);
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ dma_resv_unlock(resv);
+ return gobj;
+
+error:
+ dma_resv_unlock(resv);
+ return ERR_PTR(ret);
+}
+
+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct ttm_placement placement = {};
+ struct amdgpu_vm_bo_base *bo_base;
+ int r;
+
+ if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+ return;
+
+ r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+ return;
+ }
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+
+ if (ticket) {
+ /* When we get an error here it means that somebody
+ * else is holding the VM lock and updating page tables
+ * So we can just continue here.
+ */
+ r = dma_resv_lock(resv, ticket);
+ if (r)
+ continue;
+
+ } else {
+ /* TODO: This is more problematic and we actually need
+ * to allow page tables updates without holding the
+ * lock.
+ */
+ if (!dma_resv_trylock(resv))
+ continue;
+ }
+
+ /* Reserve fences for two SDMA page table updates */
+ r = dma_resv_reserve_fences(resv, 2);
+ if (!r)
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (!r)
+ r = amdgpu_vm_handle_moved(adev, vm);
+
+ if (r && r != -EBUSY)
+ DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
+ r);
+
+ dma_resv_unlock(resv);
+ }
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+ .allow_peer2peer = true,
+ .move_notify = amdgpu_dma_buf_move_notify
+};
+
+/**
+ * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
+ * @dev: DRM device
+ * @dma_buf: Shared DMA buffer
+ *
+ * Import a dma_buf into a the driver and potentially create a new GEM object.
+ *
+ * Returns:
+ * GEM BO representing the shared DMA buffer for the given device.
+ */
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ struct dma_buf_attachment *attach;
+ struct drm_gem_object *obj;
+
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+ obj = dma_buf->priv;
+ if (obj->dev == dev) {
+ /*
+ * Importing dmabuf exported from out own gem increases
+ * refcount on gem itself instead of f_count of dmabuf.
+ */
+ drm_gem_object_get(obj);
+ return obj;
+ }
+ }
+
+ obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
+
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+ &amdgpu_dma_buf_attach_ops, obj);
+ if (IS_ERR(attach)) {
+ drm_gem_object_put(obj);
+ return ERR_CAST(attach);
+ }
+
+ get_dma_buf(dma_buf);
+ obj->import_attach = attach;
+ return obj;
+}
+
+/**
+ * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer
+ *
+ * @adev: amdgpu_device pointer of the importer
+ * @bo: amdgpu buffer object
+ *
+ * Returns:
+ * True if dmabuf accessible over xgmi, false otherwise.
+ */
+bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->tbo.base;
+ struct drm_gem_object *gobj;
+
+ if (obj->import_attach) {
+ struct dma_buf *dma_buf = obj->import_attach->dmabuf;
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* No XGMI with non AMD GPUs */
+ return false;
+
+ gobj = dma_buf->priv;
+ bo = gem_to_amdgpu_bo(gobj);
+ }
+
+ if (amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
+ (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM))
+ return true;
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000..3e93b9b40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
+ int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
new file mode 100644
index 000000000..4a8b33f55
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_DOORBELL_H
+#define AMDGPU_DOORBELL_H
+
+/*
+ * GPU doorbell structures, functions & helpers
+ */
+struct amdgpu_doorbell {
+ /* doorbell mmio */
+ resource_size_t base;
+ resource_size_t size;
+
+ /* Number of doorbells reserved for amdgpu kernel driver */
+ u32 num_kernel_doorbells;
+
+ /* Kernel doorbells */
+ struct amdgpu_bo *kernel_doorbells;
+
+ /* For CPU access of doorbells */
+ uint32_t *cpu_addr;
+};
+
+/* Reserved doorbells for amdgpu (including multimedia).
+ * KFD can use all the rest in the 2M doorbell bar.
+ * For asic before vega10, doorbell is 32-bit, so the
+ * index/offset is in dword. For vega10 and after, doorbell
+ * can be 64-bit, so the index defined is in qword.
+ */
+struct amdgpu_doorbell_index {
+ uint32_t kiq;
+ uint32_t mec_ring0;
+ uint32_t mec_ring1;
+ uint32_t mec_ring2;
+ uint32_t mec_ring3;
+ uint32_t mec_ring4;
+ uint32_t mec_ring5;
+ uint32_t mec_ring6;
+ uint32_t mec_ring7;
+ uint32_t userqueue_start;
+ uint32_t userqueue_end;
+ uint32_t gfx_ring0;
+ uint32_t gfx_ring1;
+ uint32_t gfx_userqueue_start;
+ uint32_t gfx_userqueue_end;
+ uint32_t sdma_engine[16];
+ uint32_t mes_ring0;
+ uint32_t mes_ring1;
+ uint32_t ih;
+ union {
+ struct {
+ uint32_t vcn_ring0_1;
+ uint32_t vcn_ring2_3;
+ uint32_t vcn_ring4_5;
+ uint32_t vcn_ring6_7;
+ } vcn;
+ struct {
+ uint32_t uvd_ring0_1;
+ uint32_t uvd_ring2_3;
+ uint32_t uvd_ring4_5;
+ uint32_t uvd_ring6_7;
+ uint32_t vce_ring0_1;
+ uint32_t vce_ring2_3;
+ uint32_t vce_ring4_5;
+ uint32_t vce_ring6_7;
+ } uvd_vce;
+ };
+ uint32_t first_non_cp;
+ uint32_t last_non_cp;
+ uint32_t max_assignment;
+ /* Per engine SDMA doorbell size in dword */
+ uint32_t sdma_doorbell_range;
+ /* Per xcc doorbell size for KIQ/KCQ */
+ uint32_t xcc_doorbell_range;
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT {
+ AMDGPU_DOORBELL_KIQ = 0x000,
+ AMDGPU_DOORBELL_HIQ = 0x001,
+ AMDGPU_DOORBELL_DIQ = 0x002,
+ AMDGPU_DOORBELL_MEC_RING0 = 0x010,
+ AMDGPU_DOORBELL_MEC_RING1 = 0x011,
+ AMDGPU_DOORBELL_MEC_RING2 = 0x012,
+ AMDGPU_DOORBELL_MEC_RING3 = 0x013,
+ AMDGPU_DOORBELL_MEC_RING4 = 0x014,
+ AMDGPU_DOORBELL_MEC_RING5 = 0x015,
+ AMDGPU_DOORBELL_MEC_RING6 = 0x016,
+ AMDGPU_DOORBELL_MEC_RING7 = 0x017,
+ AMDGPU_DOORBELL_GFX_RING0 = 0x020,
+ AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
+ AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
+ AMDGPU_DOORBELL_IH = 0x1E8,
+ AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
+ AMDGPU_DOORBELL_INVALID = 0xFFFF
+};
+
+enum AMDGPU_VEGA20_DOORBELL_ASSIGNMENT {
+
+ /* Compute + GFX: 0~255 */
+ AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
+ AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
+ AMDGPU_VEGA20_DOORBELL_DIQ = 0x002,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009,
+ AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A,
+ AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
+ AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A,
+ AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B,
+ /* SDMA:256~335*/
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C,
+ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146,
+ /* IH: 376~391 */
+ AMDGPU_VEGA20_DOORBELL_IH = 0x178,
+ /* MMSCH: 392~407
+ * overlap the doorbell assignment with VCN as they are mutually exclusive
+ * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
+ */
+ AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */
+ AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
+ AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
+ AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
+
+ AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */
+ AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D,
+ AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E,
+ AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F,
+
+ AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
+ AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
+ AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
+ AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B,
+
+ AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C,
+ AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
+ AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
+ AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
+
+ AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
+ AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
+
+ /* kiq/kcq from second XCD. Max 8 XCDs */
+ AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190,
+ /* 8 compute rings per GC. Max to 0x1CE */
+ AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197,
+
+ /* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+ AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0,
+
+ AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7,
+ AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
+};
+
+enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT {
+
+ /* Compute + GFX: 0~255 */
+ AMDGPU_NAVI10_DOORBELL_KIQ = 0x000,
+ AMDGPU_NAVI10_DOORBELL_HIQ = 0x001,
+ AMDGPU_NAVI10_DOORBELL_DIQ = 0x002,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING0 = 0x003,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING1 = 0x004,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING2 = 0x005,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING3 = 0x006,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING4 = 0x007,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A,
+ AMDGPU_NAVI10_DOORBELL_MES_RING0 = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_MES_RING1 = 0x00C,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00D,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
+ AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
+ AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START = 0x08D,
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END = 0x0FF,
+
+ /* SDMA:256~335*/
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2 = 0x114,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3 = 0x11E,
+ /* IH: 376~391 */
+ AMDGPU_NAVI10_DOORBELL_IH = 0x178,
+ /* MMSCH: 392~407
+ * overlap the doorbell assignment with VCN as they are mutually exclusive
+ * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+ */
+ AMDGPU_NAVI10_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+ AMDGPU_NAVI10_DOORBELL64_VCN2_3 = 0x189,
+ AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A,
+ AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B,
+
+ AMDGPU_NAVI10_DOORBELL64_VCN8_9 = 0x18C,
+ AMDGPU_NAVI10_DOORBELL64_VCNa_b = 0x18D,
+ AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
+ AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
+
+ AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
+
+ AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
+};
+
+/*
+ * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
+ */
+enum AMDGPU_DOORBELL64_ASSIGNMENT {
+ /*
+ * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
+ * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
+ * Compute related doorbells are allocated from 0x00 to 0x8a
+ */
+
+
+ /* kernel scheduling */
+ AMDGPU_DOORBELL64_KIQ = 0x00,
+
+ /* HSA interface queue and debug queue */
+ AMDGPU_DOORBELL64_HIQ = 0x01,
+ AMDGPU_DOORBELL64_DIQ = 0x02,
+
+ /* Compute engines */
+ AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
+ AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
+ AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
+ AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
+ AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
+ AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
+ AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
+ AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
+
+ /* User queue doorbell range (128 doorbells) */
+ AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
+ AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
+
+ /* Graphics engine */
+ AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
+
+ /*
+ * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
+ * Graphics voltage island aperture 1
+ * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
+ */
+
+ /* For vega10 sriov, the sdma doorbell must be fixed as follow
+ * to keep the same setting with host driver, or it will
+ * happen conflicts
+ */
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
+
+ /* Interrupt handler */
+ AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
+ AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
+ AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
+
+ /* VCN engine use 32 bits doorbell */
+ AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+ AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
+ AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
+ AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
+
+ /* overlap the doorbell assignment with VCN as they are mutually exclusive
+ * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+ */
+ AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
+ AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
+ AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
+ AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
+
+ AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
+ AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
+ AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
+ AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
+
+ AMDGPU_DOORBELL64_FIRST_NON_CP = AMDGPU_DOORBELL64_sDMA_ENGINE0,
+ AMDGPU_DOORBELL64_LAST_NON_CP = AMDGPU_DOORBELL64_VCE_RING6_7,
+
+ AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
+ AMDGPU_DOORBELL64_INVALID = 0xFFFF
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
+
+ /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
+
+ /* KIQ/HIQ/DIQ */
+ AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
+ AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
+ AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
+ /* Compute: 0x08 ~ 0x20 */
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
+ AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
+ AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
+ AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,
+
+ /* SDMA: 0x100 ~ 0x19F */
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
+ /* IH: 0x1A0 ~ 0x1AF */
+ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
+ /* VCN: 0x1B0 ~ 0x1E8 */
+ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
+ AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
+
+ AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+ AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+ AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8,
+ AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
+};
+
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
+
+#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
+#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
+#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
+#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
new file mode 100644
index 000000000..3f3662e8b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_mm_rdoorbell - read a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return readl(adev->doorbell.cpu_addr + index);
+
+ DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell - write a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ writel(v, adev->doorbell.cpu_addr + index);
+ else
+ DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
+
+ DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+ return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (index < adev->doorbell.num_kernel_doorbells)
+ atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
+ else
+ DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @db_bo: doorbell object's bo
+ * @doorbell_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
+ *
+ * returns doorbell's absolute index in BAR
+ */
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
+{
+ int db_bo_offset;
+
+ db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
+
+ /* doorbell index is 32 bit but doorbell's size can be 32 bit
+ * or 64 bit, so *db_size(in byte)/4 for alignment.
+ */
+ return db_bo_offset / sizeof(u32) + doorbell_index *
+ DIV_ROUND_UP(db_size, 4);
+}
+
+/**
+ * amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Creates doorbells for graphics driver usages.
+ * returns 0 on success, error otherwise.
+ */
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
+{
+ int r;
+ int size;
+
+ /* SI HW does not have doorbells, skip allocation */
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return 0;
+
+ /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
+ size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
+
+ /* Allocate an extra page for MES kernel usages (ring test) */
+ adev->mes.db_start_dw_offset = size / sizeof(u32);
+ size += PAGE_SIZE;
+
+ r = amdgpu_bo_create_kernel(adev,
+ size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+ if (r) {
+ DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r);
+ return r;
+ }
+
+ adev->doorbell.num_kernel_doorbells = size / sizeof(u32);
+ return 0;
+}
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+/**
+ * amdgpu_doorbell_init - Init doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Init doorbell driver information (CIK)
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev)
+{
+
+ /* No doorbell on SI hardware generation */
+ if (adev->asic_type < CHIP_BONAIRE) {
+ adev->doorbell.base = 0;
+ adev->doorbell.size = 0;
+ adev->doorbell.num_kernel_doorbells = 0;
+ return 0;
+ }
+
+ if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
+ return -EINVAL;
+
+ amdgpu_asic_init_doorbell_index(adev);
+
+ /* doorbell bar mapping */
+ adev->doorbell.base = pci_resource_start(adev->pdev, 2);
+ adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+
+ adev->doorbell.num_kernel_doorbells =
+ min_t(u32, adev->doorbell.size / sizeof(u32),
+ adev->doorbell_index.max_assignment + 1);
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return -EINVAL;
+
+ /*
+ * For Vega, reserve and map two pages on doorbell BAR since SDMA
+ * paging queue doorbell use the second page. The
+ * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
+ * doorbells are in the first page. So with paging queue enabled,
+ * the max num_kernel_doorbells should + 1 page (0x400 in dword)
+ */
+ if (adev->asic_type >= CHIP_VEGA10)
+ adev->doorbell.num_kernel_doorbells += 0x400;
+
+ return 0;
+}
+
+/**
+ * amdgpu_doorbell_fini - Tear down doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down doorbell driver information (CIK)
+ */
+void amdgpu_doorbell_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells,
+ NULL,
+ (void **)&adev->doorbell.cpu_addr);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
new file mode 100644
index 000000000..2c35036e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -0,0 +1,2922 @@
+/*
+ * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pciids.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#include <linux/cc_platform.h>
+#include <linux/dynamic_debug.h>
+#include <linux/module.h>
+#include <linux/mmu_notifier.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/vga_switcheroo.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_drv.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_irq.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_xgmi.h"
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+/*
+ * KMS wrapper.
+ * - 3.0.0 - initial driver
+ * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
+ * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
+ * at the end of IBs.
+ * - 3.3.0 - Add VM support for UVD on supported hardware.
+ * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
+ * - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
+ * - 3.7.0 - Add support for VCE clock list packet
+ * - 3.8.0 - Add support raster config init in the kernel
+ * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
+ * - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
+ * - 3.12.0 - Add query for double offchip LDS buffers
+ * - 3.13.0 - Add PRT support
+ * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Export more gpu info for gfx9
+ * - 3.16.0 - Add reserved vmid support
+ * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
+ * - 3.18.0 - Export gpu always on cu bitmap
+ * - 3.19.0 - Add support for UVD MJPEG decode
+ * - 3.20.0 - Add support for local BOs
+ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
+ * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
+ * - 3.23.0 - Add query for VRAM lost counter
+ * - 3.24.0 - Add high priority compute support for gfx9
+ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
+ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
+ * - 3.31.0 - Add support for per-flip tiling attribute changes with DC
+ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
+ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
+ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
+ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
+ * - 3.36.0 - Allow reading more status registers on si/cik
+ * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
+ * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
+ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
+ * - 3.41.0 - Add video codec query
+ * - 3.42.0 - Add 16bpc fixed point display support
+ * - 3.43.0 - Add device hot plug/unplug support
+ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
+ * - 3.45.0 - Add context ioctl stable pstate interface
+ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.48.0 - Add IP discovery version info to HW INFO
+ * - 3.49.0 - Add gang submit into CS IOCTL
+ * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock
+ * Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock
+ * 3.51.0 - Return the PCIe gen and lanes from the INFO ioctl
+ * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
+ * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
+ * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
+ * 3.53.0 - Support for GFX11 CP GFX shadowing
+ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ */
+#define KMS_DRIVER_MAJOR 3
+#define KMS_DRIVER_MINOR 54
+#define KMS_DRIVER_PATCHLEVEL 0
+
+unsigned int amdgpu_vram_limit = UINT_MAX;
+int amdgpu_vis_vram_limit;
+int amdgpu_gart_size = -1; /* auto */
+int amdgpu_gtt_size = -1; /* auto */
+int amdgpu_moverate = -1; /* auto */
+int amdgpu_audio = -1;
+int amdgpu_disp_priority;
+int amdgpu_hw_i2c;
+int amdgpu_pcie_gen2 = -1;
+int amdgpu_msi = -1;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
+int amdgpu_dpm = -1;
+int amdgpu_fw_load_type = -1;
+int amdgpu_aspm = -1;
+int amdgpu_runtime_pm = -1;
+uint amdgpu_ip_block_mask = 0xffffffff;
+int amdgpu_bapm = -1;
+int amdgpu_deep_color;
+int amdgpu_vm_size = -1;
+int amdgpu_vm_fragment_size = -1;
+int amdgpu_vm_block_size = -1;
+int amdgpu_vm_fault_stop;
+int amdgpu_vm_debug;
+int amdgpu_vm_update_mode = -1;
+int amdgpu_exp_hw_support;
+int amdgpu_dc = -1;
+int amdgpu_sched_jobs = 32;
+int amdgpu_sched_hw_submission = 2;
+uint amdgpu_pcie_gen_cap;
+uint amdgpu_pcie_lane_cap;
+u64 amdgpu_cg_mask = 0xffffffffffffffff;
+uint amdgpu_pg_mask = 0xffffffff;
+uint amdgpu_sdma_phase_quantum = 32;
+char *amdgpu_disable_cu;
+char *amdgpu_virtual_display;
+bool enforce_isolation;
+/*
+ * OverDrive(bit 14) disabled by default
+ * GFX DCS(bit 19) disabled by default
+ */
+uint amdgpu_pp_feature_mask = 0xfff7bfff;
+uint amdgpu_force_long_training;
+int amdgpu_lbpw = -1;
+int amdgpu_compute_multipipe = -1;
+int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_emu_mode;
+uint amdgpu_smu_memory_pool_size;
+int amdgpu_smu_pptable_id = -1;
+/*
+ * FBC (bit 0) disabled by default
+ * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default
+ * - With this, for multiple monitors in sync(e.g. with the same model),
+ * mclk switching will be allowed. And the mclk will be not foced to the
+ * highest. That helps saving some idle power.
+ * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default
+ * PSR (bit 3) disabled by default
+ * EDP NO POWER SEQUENCING (bit 4) disabled by default
+ */
+uint amdgpu_dc_feature_mask = 2;
+uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_visual_confirm;
+int amdgpu_async_gfx_ring = 1;
+int amdgpu_mcbp = -1;
+int amdgpu_discovery = -1;
+int amdgpu_mes;
+int amdgpu_mes_kiq;
+int amdgpu_noretry = -1;
+int amdgpu_force_asic_type = -1;
+int amdgpu_tmz = -1; /* auto */
+int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
+int amdgpu_smartshift_bias;
+int amdgpu_use_xgmi_p2p = 1;
+int amdgpu_vcnfw_log;
+int amdgpu_sg_display = -1; /* auto */
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+
+static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
+ "DRM_UT_CORE",
+ "DRM_UT_DRIVER",
+ "DRM_UT_KMS",
+ "DRM_UT_PRIME",
+ "DRM_UT_ATOMIC",
+ "DRM_UT_VBL",
+ "DRM_UT_STATE",
+ "DRM_UT_LEASE",
+ "DRM_UT_DP",
+ "DRM_UT_DRMRES");
+
+struct amdgpu_mgpu_info mgpu_info = {
+ .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
+ .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
+ mgpu_info.delayed_reset_work,
+ amdgpu_drv_delayed_reset_work_handler, 0),
+};
+int amdgpu_ras_enable = -1;
+uint amdgpu_ras_mask = 0xffffffff;
+int amdgpu_bad_page_threshold = -1;
+struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
+ .timeout_fatal_disable = false,
+ .period = 0x0, /* default to 0x0 (timeout disable) */
+};
+
+/**
+ * DOC: vramlimit (int)
+ * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM).
+ */
+MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
+module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
+
+/**
+ * DOC: vis_vramlimit (int)
+ * Restrict the amount of CPU visible VRAM in MiB for testing. The default is 0 (Use full CPU visible VRAM).
+ */
+MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
+module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
+
+/**
+ * DOC: gartsize (uint)
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
+ */
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
+module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
+
+/**
+ * DOC: gttsize (int)
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use 1/2 RAM, minimum value is 3GB).
+ */
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
+module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
+
+/**
+ * DOC: moverate (int)
+ * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s).
+ */
+MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
+module_param_named(moverate, amdgpu_moverate, int, 0600);
+
+/**
+ * DOC: audio (int)
+ * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
+ */
+MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
+module_param_named(audio, amdgpu_audio, int, 0444);
+
+/**
+ * DOC: disp_priority (int)
+ * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto).
+ */
+MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");
+module_param_named(disp_priority, amdgpu_disp_priority, int, 0444);
+
+/**
+ * DOC: hw_i2c (int)
+ * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled).
+ */
+MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
+module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444);
+
+/**
+ * DOC: pcie_gen2 (int)
+ * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
+module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
+
+/**
+ * DOC: msi (int)
+ * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(msi, amdgpu_msi, int, 0444);
+
+/**
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
+ * to the default timeout.
+ *
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX.
+ * The second one is for Compute. The third and fourth ones are
+ * for SDMA and Video.
+ *
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
+ * jobs is 10000. The timeout for compute is 60000.
+ */
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
+ "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+
+/**
+ * DOC: dpm (int)
+ * Override for dynamic power management setting
+ * (0 = disable, 1 = enable)
+ * The default is -1 (auto).
+ */
+MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(dpm, amdgpu_dpm, int, 0444);
+
+/**
+ * DOC: fw_load_type (int)
+ * Set different firmware loading type for debugging, if supported.
+ * Set to 0 to force direct loading if supported by the ASIC. Set
+ * to -1 to select the default loading mode for the ASIC, as defined
+ * by the driver. The default is -1 (auto).
+ */
+MODULE_PARM_DESC(fw_load_type, "firmware loading type (3 = rlc backdoor autoload if supported, 2 = smu load if supported, 1 = psp load, 0 = force direct if supported, -1 = auto)");
+module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
+
+/**
+ * DOC: aspm (int)
+ * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(aspm, amdgpu_aspm, int, 0444);
+
+/**
+ * DOC: runpm (int)
+ * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
+ * the dGPUs when they are idle if supported. The default is -1 (auto enable).
+ * Setting the value to 0 disables this functionality.
+ * Setting the value to -2 is auto enabled with power down when displays are attached.
+ */
+MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)");
+module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
+
+/**
+ * DOC: ip_block_mask (uint)
+ * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.).
+ * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have
+ * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in
+ * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
+ */
+MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
+module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+
+/**
+ * DOC: bapm (int)
+ * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it.
+ * The default -1 (auto, enabled)
+ */
+MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(bapm, amdgpu_bapm, int, 0444);
+
+/**
+ * DOC: deep_color (int)
+ * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
+module_param_named(deep_color, amdgpu_deep_color, int, 0444);
+
+/**
+ * DOC: vm_size (int)
+ * Override the size of the GPU's per client virtual address space in GiB. The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
+module_param_named(vm_size, amdgpu_vm_size, int, 0444);
+
+/**
+ * DOC: vm_fragment_size (int)
+ * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
+module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
+
+/**
+ * DOC: vm_block_size (int)
+ * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
+module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
+
+/**
+ * DOC: vm_fault_stop (int)
+ * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop).
+ */
+MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
+module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
+
+/**
+ * DOC: vm_debug (int)
+ * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
+ */
+MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
+module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
+
+/**
+ * DOC: vm_update_mode (int)
+ * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
+ * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
+ */
+MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
+module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+
+/**
+ * DOC: exp_hw_support (int)
+ * Enable experimental hw support (1 = enable). The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
+module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+
+/**
+ * DOC: dc (int)
+ * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");
+module_param_named(dc, amdgpu_dc, int, 0444);
+
+/**
+ * DOC: sched_jobs (int)
+ * Override the max number of jobs supported in the sw queue. The default is 32.
+ */
+MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
+module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
+
+/**
+ * DOC: sched_hw_submission (int)
+ * Override the max number of HW submissions. The default is 2.
+ */
+MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
+module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
+
+/**
+ * DOC: ppfeaturemask (hexint)
+ * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable power features.
+ */
+MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444);
+
+/**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
+/**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
+MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
+module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+
+/**
+ * DOC: pcie_lane_cap (uint)
+ * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
+MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
+/**
+ * DOC: cg_mask (ullong)
+ * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled).
+ */
+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444);
+
+/**
+ * DOC: pg_mask (uint)
+ * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+/**
+ * DOC: sdma_phase_quantum (uint)
+ * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32.
+ */
+MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
+module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+
+/**
+ * DOC: disable_cu (charp)
+ * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL.
+ */
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
+/**
+ * DOC: virtual_display (charp)
+ * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards
+ * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of
+ * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci
+ * device at 26:00.0. The default is NULL.
+ */
+MODULE_PARM_DESC(virtual_display,
+ "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
+module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
+
+/**
+ * DOC: lbpw (int)
+ * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(lbpw, amdgpu_lbpw, int, 0444);
+
+MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+
+/**
+ * DOC: gpu_recovery (int)
+ * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
+ */
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+
+/**
+ * DOC: emu_mode (int)
+ * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
+module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+
+/**
+ * DOC: ras_enable (int)
+ * Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))
+ */
+MODULE_PARM_DESC(ras_enable, "Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))");
+module_param_named(ras_enable, amdgpu_ras_enable, int, 0444);
+
+/**
+ * DOC: ras_mask (uint)
+ * Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1
+ * See the flags in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+ */
+MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1");
+module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);
+
+/**
+ * DOC: timeout_fatal_disable (bool)
+ * Disable Watchdog timeout fatal error event
+ */
+MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)");
+module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644);
+
+/**
+ * DOC: timeout_period (uint)
+ * Modify the watchdog timeout max_cycles as (1 << period)
+ */
+MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)");
+module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
+
+/**
+ * DOC: si_support (int)
+ * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
+#ifdef CONFIG_DRM_AMDGPU_SI
+
+#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
+int amdgpu_si_support = 0;
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
+#else
+int amdgpu_si_support = 1;
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
+#endif
+
+module_param_named(si_support, amdgpu_si_support, int, 0444);
+#endif
+
+/**
+ * DOC: cik_support (int)
+ * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
+#ifdef CONFIG_DRM_AMDGPU_CIK
+
+#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
+int amdgpu_cik_support = 0;
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
+#else
+int amdgpu_cik_support = 1;
+MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
+#endif
+
+module_param_named(cik_support, amdgpu_cik_support, int, 0444);
+#endif
+
+/**
+ * DOC: smu_memory_pool_size (uint)
+ * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB.
+ * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(smu_memory_pool_size,
+ "reserve gtt for smu debug usage, 0 = disable,0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
+/**
+ * DOC: async_gfx_ring (int)
+ * It is used to enable gfx rings that could be configured with different prioritites or equal priorities
+ */
+MODULE_PARM_DESC(async_gfx_ring,
+ "Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))");
+module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
+
+/**
+ * DOC: mcbp (int)
+ * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default))
+ */
+MODULE_PARM_DESC(mcbp,
+ "Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)");
+module_param_named(mcbp, amdgpu_mcbp, int, 0444);
+
+/**
+ * DOC: discovery (int)
+ * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
+ * (-1 = auto (default), 0 = disabled, 1 = enabled, 2 = use ip_discovery table from file)
+ */
+MODULE_PARM_DESC(discovery,
+ "Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
+module_param_named(discovery, amdgpu_discovery, int, 0444);
+
+/**
+ * DOC: mes (int)
+ * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes,
+ "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(mes, amdgpu_mes, int, 0444);
+
+/**
+ * DOC: mes_kiq (int)
+ * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_kiq,
+ "Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
+
+/**
+ * DOC: noretry (int)
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
+ * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
+ */
+MODULE_PARM_DESC(noretry,
+ "Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))");
+module_param_named(noretry, amdgpu_noretry, int, 0644);
+
+/**
+ * DOC: force_asic_type (int)
+ * A non negative value used to specify the asic type for all supported GPUs.
+ */
+MODULE_PARM_DESC(force_asic_type,
+ "A non negative value used to specify the asic type for all supported GPUs");
+module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+
+/**
+ * DOC: use_xgmi_p2p (int)
+ * Enables/disables XGMI P2P interface (0 = disable, 1 = enable).
+ */
+MODULE_PARM_DESC(use_xgmi_p2p,
+ "Enable XGMI P2P interface (0 = disable; 1 = enable (default))");
+module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
+
+
+#ifdef CONFIG_HSA_AMD
+/**
+ * DOC: sched_policy (int)
+ * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
+ * Setting 1 disables over-subscription. Setting 2 disables HWS and statically
+ * assigns queues to HQDs.
+ */
+int sched_policy = KFD_SCHED_POLICY_HWS;
+module_param(sched_policy, int, 0444);
+MODULE_PARM_DESC(sched_policy,
+ "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
+/**
+ * DOC: hws_max_conc_proc (int)
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
+ * number of VMIDs assigned to the HWS, which is also the default.
+ */
+int hws_max_conc_proc = -1;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+/**
+ * DOC: cwsr_enable (int)
+ * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
+ * the middle of a compute wave. Default is 1 to enable this feature. Setting 0
+ * disables it.
+ */
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
+/**
+ * DOC: max_num_of_queues_per_device (int)
+ * Maximum number of queues per device. Valid setting is between 1 and 4096. Default
+ * is 4096.
+ */
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+ "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+
+/**
+ * DOC: send_sigterm (int)
+ * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
+ * but just print errors on dmesg. Setting 1 enables sending sigterm.
+ */
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+ "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
+/**
+ * DOC: debug_largebar (int)
+ * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
+ * system. This limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * Default value is 0, diabled.
+ */
+int debug_largebar;
+module_param(debug_largebar, int, 0444);
+MODULE_PARM_DESC(debug_largebar,
+ "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
+
+/**
+ * DOC: halt_if_hws_hang (int)
+ * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
+ * Setting 1 enables halt on hang.
+ */
+int halt_if_hws_hang;
+module_param(halt_if_hws_hang, int, 0644);
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Assume that HWS supports GWS barriers regardless of what firmware version
+ * check says. Default value: false (rely on MEC2 firmware version check).
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
+
+/**
+ * DOC: queue_preemption_timeout_ms (int)
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+ */
+int queue_preemption_timeout_ms = 9000;
+module_param(queue_preemption_timeout_ms, int, 0644);
+MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+#endif
+
+/**
+ * DOC: mtype_local (int)
+ */
+int amdgpu_mtype_local;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
+
+/**
+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+#ifdef CONFIG_HSA_AMD_P2P
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
+#endif
+
+/**
+ * DOC: dcfeaturemask (uint)
+ * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable display features.
+ */
+MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
+module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
+
+/**
+ * DOC: dcdebugmask (uint)
+ * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ */
+MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
+module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+
+MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
+module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
+
+/**
+ * DOC: abmlevel (uint)
+ * Override the default ABM (Adaptive Backlight Management) level used for DC
+ * enabled hardware. Requires DMCU to be supported and loaded.
+ * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by
+ * default. Values 1-4 control the maximum allowable brightness reduction via
+ * the ABM algorithm, with 1 being the least reduction and 4 being the most
+ * reduction.
+ *
+ * Defaults to 0, or disabled. Userspace can still override this level later
+ * after boot.
+ */
+uint amdgpu_dm_abm_level;
+MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
+module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+
+int amdgpu_backlight = -1;
+MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
+module_param_named(backlight, amdgpu_backlight, bint, 0444);
+
+/**
+ * DOC: tmz (int)
+ * Trusted Memory Zone (TMZ) is a method to protect data being written
+ * to or read from memory.
+ *
+ * The default value: 0 (off). TODO: change to auto till it is completed.
+ */
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)");
+module_param_named(tmz, amdgpu_tmz, int, 0444);
+
+/**
+ * DOC: reset_method (int)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
+ */
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
+module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+
+/**
+ * DOC: bad_page_threshold (int) Bad page threshold is specifies the
+ * threshold value of faulty pages detected by RAS ECC, which may
+ * result in the GPU entering bad status when the number of total
+ * faulty pages by ECC exceeds the threshold value.
+ */
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)");
+module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
+
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
+/**
+ * DOC: vcnfw_log (int)
+ * Enable vcnfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)");
+module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
+
+/**
+ * DOC: sg_display (int)
+ * Disable S/G (scatter/gather) display (i.e., display from system memory).
+ * This option is only relevant on APUs. Set this option to 0 to disable
+ * S/G display if you experience flickering or other issues under memory
+ * pressure and report the issue.
+ */
+MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
+module_param_named(sg_display, amdgpu_sg_display, int, 0444);
+
+/**
+ * DOC: smu_pptable_id (int)
+ * Used to override pptable id. id = 0 use VBIOS pptable.
+ * id > 0 use the soft pptable with specicfied id.
+ */
+MODULE_PARM_DESC(smu_pptable_id,
+ "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
+module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+
+/**
+ * DOC: partition_mode (int)
+ * Used to override the default SPX mode.
+ */
+MODULE_PARM_DESC(
+ user_partt_mode,
+ "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+ 0 = AMDGPU_SPX_PARTITION_MODE, \
+ 1 = AMDGPU_DPX_PARTITION_MODE, \
+ 2 = AMDGPU_TPX_PARTITION_MODE, \
+ 3 = AMDGPU_QPX_PARTITION_MODE, \
+ 4 = AMDGPU_CPX_PARTITION_MODE)");
+module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+
+/**
+ * DOC: enforce_isolation (bool)
+ * enforce process isolation between graphics and compute via using the same reserved vmid.
+ */
+module_param(enforce_isolation, bool, 0444);
+MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+
+/* These devices are not supported by amdgpu.
+ * They are supported by the mach64, r128, radeon drivers
+ */
+static const u16 amdgpu_unsupported_pciidlist[] = {
+ /* mach64 */
+ 0x4354,
+ 0x4358,
+ 0x4554,
+ 0x4742,
+ 0x4744,
+ 0x4749,
+ 0x474C,
+ 0x474D,
+ 0x474E,
+ 0x474F,
+ 0x4750,
+ 0x4751,
+ 0x4752,
+ 0x4753,
+ 0x4754,
+ 0x4755,
+ 0x4756,
+ 0x4757,
+ 0x4758,
+ 0x4759,
+ 0x475A,
+ 0x4C42,
+ 0x4C44,
+ 0x4C47,
+ 0x4C49,
+ 0x4C4D,
+ 0x4C4E,
+ 0x4C50,
+ 0x4C51,
+ 0x4C52,
+ 0x4C53,
+ 0x5654,
+ 0x5655,
+ 0x5656,
+ /* r128 */
+ 0x4c45,
+ 0x4c46,
+ 0x4d46,
+ 0x4d4c,
+ 0x5041,
+ 0x5042,
+ 0x5043,
+ 0x5044,
+ 0x5045,
+ 0x5046,
+ 0x5047,
+ 0x5048,
+ 0x5049,
+ 0x504A,
+ 0x504B,
+ 0x504C,
+ 0x504D,
+ 0x504E,
+ 0x504F,
+ 0x5050,
+ 0x5051,
+ 0x5052,
+ 0x5053,
+ 0x5054,
+ 0x5055,
+ 0x5056,
+ 0x5057,
+ 0x5058,
+ 0x5245,
+ 0x5246,
+ 0x5247,
+ 0x524b,
+ 0x524c,
+ 0x534d,
+ 0x5446,
+ 0x544C,
+ 0x5452,
+ /* radeon */
+ 0x3150,
+ 0x3151,
+ 0x3152,
+ 0x3154,
+ 0x3155,
+ 0x3E50,
+ 0x3E54,
+ 0x4136,
+ 0x4137,
+ 0x4144,
+ 0x4145,
+ 0x4146,
+ 0x4147,
+ 0x4148,
+ 0x4149,
+ 0x414A,
+ 0x414B,
+ 0x4150,
+ 0x4151,
+ 0x4152,
+ 0x4153,
+ 0x4154,
+ 0x4155,
+ 0x4156,
+ 0x4237,
+ 0x4242,
+ 0x4336,
+ 0x4337,
+ 0x4437,
+ 0x4966,
+ 0x4967,
+ 0x4A48,
+ 0x4A49,
+ 0x4A4A,
+ 0x4A4B,
+ 0x4A4C,
+ 0x4A4D,
+ 0x4A4E,
+ 0x4A4F,
+ 0x4A50,
+ 0x4A54,
+ 0x4B48,
+ 0x4B49,
+ 0x4B4A,
+ 0x4B4B,
+ 0x4B4C,
+ 0x4C57,
+ 0x4C58,
+ 0x4C59,
+ 0x4C5A,
+ 0x4C64,
+ 0x4C66,
+ 0x4C67,
+ 0x4E44,
+ 0x4E45,
+ 0x4E46,
+ 0x4E47,
+ 0x4E48,
+ 0x4E49,
+ 0x4E4A,
+ 0x4E4B,
+ 0x4E50,
+ 0x4E51,
+ 0x4E52,
+ 0x4E53,
+ 0x4E54,
+ 0x4E56,
+ 0x5144,
+ 0x5145,
+ 0x5146,
+ 0x5147,
+ 0x5148,
+ 0x514C,
+ 0x514D,
+ 0x5157,
+ 0x5158,
+ 0x5159,
+ 0x515A,
+ 0x515E,
+ 0x5460,
+ 0x5462,
+ 0x5464,
+ 0x5548,
+ 0x5549,
+ 0x554A,
+ 0x554B,
+ 0x554C,
+ 0x554D,
+ 0x554E,
+ 0x554F,
+ 0x5550,
+ 0x5551,
+ 0x5552,
+ 0x5554,
+ 0x564A,
+ 0x564B,
+ 0x564F,
+ 0x5652,
+ 0x5653,
+ 0x5657,
+ 0x5834,
+ 0x5835,
+ 0x5954,
+ 0x5955,
+ 0x5974,
+ 0x5975,
+ 0x5960,
+ 0x5961,
+ 0x5962,
+ 0x5964,
+ 0x5965,
+ 0x5969,
+ 0x5a41,
+ 0x5a42,
+ 0x5a61,
+ 0x5a62,
+ 0x5b60,
+ 0x5b62,
+ 0x5b63,
+ 0x5b64,
+ 0x5b65,
+ 0x5c61,
+ 0x5c63,
+ 0x5d48,
+ 0x5d49,
+ 0x5d4a,
+ 0x5d4c,
+ 0x5d4d,
+ 0x5d4e,
+ 0x5d4f,
+ 0x5d50,
+ 0x5d52,
+ 0x5d57,
+ 0x5e48,
+ 0x5e4a,
+ 0x5e4b,
+ 0x5e4c,
+ 0x5e4d,
+ 0x5e4f,
+ 0x6700,
+ 0x6701,
+ 0x6702,
+ 0x6703,
+ 0x6704,
+ 0x6705,
+ 0x6706,
+ 0x6707,
+ 0x6708,
+ 0x6709,
+ 0x6718,
+ 0x6719,
+ 0x671c,
+ 0x671d,
+ 0x671f,
+ 0x6720,
+ 0x6721,
+ 0x6722,
+ 0x6723,
+ 0x6724,
+ 0x6725,
+ 0x6726,
+ 0x6727,
+ 0x6728,
+ 0x6729,
+ 0x6738,
+ 0x6739,
+ 0x673e,
+ 0x6740,
+ 0x6741,
+ 0x6742,
+ 0x6743,
+ 0x6744,
+ 0x6745,
+ 0x6746,
+ 0x6747,
+ 0x6748,
+ 0x6749,
+ 0x674A,
+ 0x6750,
+ 0x6751,
+ 0x6758,
+ 0x6759,
+ 0x675B,
+ 0x675D,
+ 0x675F,
+ 0x6760,
+ 0x6761,
+ 0x6762,
+ 0x6763,
+ 0x6764,
+ 0x6765,
+ 0x6766,
+ 0x6767,
+ 0x6768,
+ 0x6770,
+ 0x6771,
+ 0x6772,
+ 0x6778,
+ 0x6779,
+ 0x677B,
+ 0x6840,
+ 0x6841,
+ 0x6842,
+ 0x6843,
+ 0x6849,
+ 0x684C,
+ 0x6850,
+ 0x6858,
+ 0x6859,
+ 0x6880,
+ 0x6888,
+ 0x6889,
+ 0x688A,
+ 0x688C,
+ 0x688D,
+ 0x6898,
+ 0x6899,
+ 0x689b,
+ 0x689c,
+ 0x689d,
+ 0x689e,
+ 0x68a0,
+ 0x68a1,
+ 0x68a8,
+ 0x68a9,
+ 0x68b0,
+ 0x68b8,
+ 0x68b9,
+ 0x68ba,
+ 0x68be,
+ 0x68bf,
+ 0x68c0,
+ 0x68c1,
+ 0x68c7,
+ 0x68c8,
+ 0x68c9,
+ 0x68d8,
+ 0x68d9,
+ 0x68da,
+ 0x68de,
+ 0x68e0,
+ 0x68e1,
+ 0x68e4,
+ 0x68e5,
+ 0x68e8,
+ 0x68e9,
+ 0x68f1,
+ 0x68f2,
+ 0x68f8,
+ 0x68f9,
+ 0x68fa,
+ 0x68fe,
+ 0x7100,
+ 0x7101,
+ 0x7102,
+ 0x7103,
+ 0x7104,
+ 0x7105,
+ 0x7106,
+ 0x7108,
+ 0x7109,
+ 0x710A,
+ 0x710B,
+ 0x710C,
+ 0x710E,
+ 0x710F,
+ 0x7140,
+ 0x7141,
+ 0x7142,
+ 0x7143,
+ 0x7144,
+ 0x7145,
+ 0x7146,
+ 0x7147,
+ 0x7149,
+ 0x714A,
+ 0x714B,
+ 0x714C,
+ 0x714D,
+ 0x714E,
+ 0x714F,
+ 0x7151,
+ 0x7152,
+ 0x7153,
+ 0x715E,
+ 0x715F,
+ 0x7180,
+ 0x7181,
+ 0x7183,
+ 0x7186,
+ 0x7187,
+ 0x7188,
+ 0x718A,
+ 0x718B,
+ 0x718C,
+ 0x718D,
+ 0x718F,
+ 0x7193,
+ 0x7196,
+ 0x719B,
+ 0x719F,
+ 0x71C0,
+ 0x71C1,
+ 0x71C2,
+ 0x71C3,
+ 0x71C4,
+ 0x71C5,
+ 0x71C6,
+ 0x71C7,
+ 0x71CD,
+ 0x71CE,
+ 0x71D2,
+ 0x71D4,
+ 0x71D5,
+ 0x71D6,
+ 0x71DA,
+ 0x71DE,
+ 0x7200,
+ 0x7210,
+ 0x7211,
+ 0x7240,
+ 0x7243,
+ 0x7244,
+ 0x7245,
+ 0x7246,
+ 0x7247,
+ 0x7248,
+ 0x7249,
+ 0x724A,
+ 0x724B,
+ 0x724C,
+ 0x724D,
+ 0x724E,
+ 0x724F,
+ 0x7280,
+ 0x7281,
+ 0x7283,
+ 0x7284,
+ 0x7287,
+ 0x7288,
+ 0x7289,
+ 0x728B,
+ 0x728C,
+ 0x7290,
+ 0x7291,
+ 0x7293,
+ 0x7297,
+ 0x7834,
+ 0x7835,
+ 0x791e,
+ 0x791f,
+ 0x793f,
+ 0x7941,
+ 0x7942,
+ 0x796c,
+ 0x796d,
+ 0x796e,
+ 0x796f,
+ 0x9400,
+ 0x9401,
+ 0x9402,
+ 0x9403,
+ 0x9405,
+ 0x940A,
+ 0x940B,
+ 0x940F,
+ 0x94A0,
+ 0x94A1,
+ 0x94A3,
+ 0x94B1,
+ 0x94B3,
+ 0x94B4,
+ 0x94B5,
+ 0x94B9,
+ 0x9440,
+ 0x9441,
+ 0x9442,
+ 0x9443,
+ 0x9444,
+ 0x9446,
+ 0x944A,
+ 0x944B,
+ 0x944C,
+ 0x944E,
+ 0x9450,
+ 0x9452,
+ 0x9456,
+ 0x945A,
+ 0x945B,
+ 0x945E,
+ 0x9460,
+ 0x9462,
+ 0x946A,
+ 0x946B,
+ 0x947A,
+ 0x947B,
+ 0x9480,
+ 0x9487,
+ 0x9488,
+ 0x9489,
+ 0x948A,
+ 0x948F,
+ 0x9490,
+ 0x9491,
+ 0x9495,
+ 0x9498,
+ 0x949C,
+ 0x949E,
+ 0x949F,
+ 0x94C0,
+ 0x94C1,
+ 0x94C3,
+ 0x94C4,
+ 0x94C5,
+ 0x94C6,
+ 0x94C7,
+ 0x94C8,
+ 0x94C9,
+ 0x94CB,
+ 0x94CC,
+ 0x94CD,
+ 0x9500,
+ 0x9501,
+ 0x9504,
+ 0x9505,
+ 0x9506,
+ 0x9507,
+ 0x9508,
+ 0x9509,
+ 0x950F,
+ 0x9511,
+ 0x9515,
+ 0x9517,
+ 0x9519,
+ 0x9540,
+ 0x9541,
+ 0x9542,
+ 0x954E,
+ 0x954F,
+ 0x9552,
+ 0x9553,
+ 0x9555,
+ 0x9557,
+ 0x955f,
+ 0x9580,
+ 0x9581,
+ 0x9583,
+ 0x9586,
+ 0x9587,
+ 0x9588,
+ 0x9589,
+ 0x958A,
+ 0x958B,
+ 0x958C,
+ 0x958D,
+ 0x958E,
+ 0x958F,
+ 0x9590,
+ 0x9591,
+ 0x9593,
+ 0x9595,
+ 0x9596,
+ 0x9597,
+ 0x9598,
+ 0x9599,
+ 0x959B,
+ 0x95C0,
+ 0x95C2,
+ 0x95C4,
+ 0x95C5,
+ 0x95C6,
+ 0x95C7,
+ 0x95C9,
+ 0x95CC,
+ 0x95CD,
+ 0x95CE,
+ 0x95CF,
+ 0x9610,
+ 0x9611,
+ 0x9612,
+ 0x9613,
+ 0x9614,
+ 0x9615,
+ 0x9616,
+ 0x9640,
+ 0x9641,
+ 0x9642,
+ 0x9643,
+ 0x9644,
+ 0x9645,
+ 0x9647,
+ 0x9648,
+ 0x9649,
+ 0x964a,
+ 0x964b,
+ 0x964c,
+ 0x964e,
+ 0x964f,
+ 0x9710,
+ 0x9711,
+ 0x9712,
+ 0x9713,
+ 0x9714,
+ 0x9715,
+ 0x9802,
+ 0x9803,
+ 0x9804,
+ 0x9805,
+ 0x9806,
+ 0x9807,
+ 0x9808,
+ 0x9809,
+ 0x980A,
+ 0x9900,
+ 0x9901,
+ 0x9903,
+ 0x9904,
+ 0x9905,
+ 0x9906,
+ 0x9907,
+ 0x9908,
+ 0x9909,
+ 0x990A,
+ 0x990B,
+ 0x990C,
+ 0x990D,
+ 0x990E,
+ 0x990F,
+ 0x9910,
+ 0x9913,
+ 0x9917,
+ 0x9918,
+ 0x9919,
+ 0x9990,
+ 0x9991,
+ 0x9992,
+ 0x9993,
+ 0x9994,
+ 0x9995,
+ 0x9996,
+ 0x9997,
+ 0x9998,
+ 0x9999,
+ 0x999A,
+ 0x999B,
+ 0x999C,
+ 0x999D,
+ 0x99A0,
+ 0x99A2,
+ 0x99A4,
+ /* radeon secondary ids */
+ 0x3171,
+ 0x3e70,
+ 0x4164,
+ 0x4165,
+ 0x4166,
+ 0x4168,
+ 0x4170,
+ 0x4171,
+ 0x4172,
+ 0x4173,
+ 0x496e,
+ 0x4a69,
+ 0x4a6a,
+ 0x4a6b,
+ 0x4a70,
+ 0x4a74,
+ 0x4b69,
+ 0x4b6b,
+ 0x4b6c,
+ 0x4c6e,
+ 0x4e64,
+ 0x4e65,
+ 0x4e66,
+ 0x4e67,
+ 0x4e68,
+ 0x4e69,
+ 0x4e6a,
+ 0x4e71,
+ 0x4f73,
+ 0x5569,
+ 0x556b,
+ 0x556d,
+ 0x556f,
+ 0x5571,
+ 0x5854,
+ 0x5874,
+ 0x5940,
+ 0x5941,
+ 0x5b70,
+ 0x5b72,
+ 0x5b73,
+ 0x5b74,
+ 0x5b75,
+ 0x5d44,
+ 0x5d45,
+ 0x5d6d,
+ 0x5d6f,
+ 0x5d72,
+ 0x5d77,
+ 0x5e6b,
+ 0x5e6d,
+ 0x7120,
+ 0x7124,
+ 0x7129,
+ 0x712e,
+ 0x712f,
+ 0x7162,
+ 0x7163,
+ 0x7166,
+ 0x7167,
+ 0x7172,
+ 0x7173,
+ 0x71a0,
+ 0x71a1,
+ 0x71a3,
+ 0x71a7,
+ 0x71bb,
+ 0x71e0,
+ 0x71e1,
+ 0x71e2,
+ 0x71e6,
+ 0x71e7,
+ 0x71f2,
+ 0x7269,
+ 0x726b,
+ 0x726e,
+ 0x72a0,
+ 0x72a8,
+ 0x72b1,
+ 0x72b3,
+ 0x793f,
+};
+
+static const struct pci_device_id pciidlist[] = {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6791, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6792, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+ {0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+ {0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+ {0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+ {0x1002, 0x6806, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6811, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6816, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6817, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+ {0x1002, 0x6600, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+ {0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+ {0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+ {0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+ {0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+ {0x1002, 0x6631, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+ {0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6822, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6826, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+ {0x1002, 0x6835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x683B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x683D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x683F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+ {0x1002, 0x6660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+ {0x1002, 0x6663, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+ {0x1002, 0x6664, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+ {0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+ {0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+ {0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ /* Kaveri */
+ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x1318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ {0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+ /* Bonaire */
+ {0x1002, 0x6640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+ {0x1002, 0x6641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+ {0x1002, 0x6646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+ {0x1002, 0x6647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+ {0x1002, 0x6649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x6650, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x6651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x6658, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x665c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x665d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ {0x1002, 0x665f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+ /* Hawaii */
+ {0x1002, 0x67A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67AA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67B0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67B8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67B9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67BA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ {0x1002, 0x67BE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+ /* Kabini */
+ {0x1002, 0x9830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x9832, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9833, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x9834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x9836, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x9838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x983a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x983b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x983c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x983d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x983e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ {0x1002, 0x983f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+ /* mullins */
+ {0x1002, 0x9850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9851, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9852, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9853, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9854, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9855, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9856, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9857, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x9859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+ {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+#endif
+ /* topaz */
+ {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+ {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+ {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+ {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+ {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+ /* tonga */
+ {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6929, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x692B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x692F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6930, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6938, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ {0x1002, 0x6939, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+ /* fiji */
+ {0x1002, 0x7300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
+ {0x1002, 0x730F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
+ /* carrizo */
+ {0x1002, 0x9870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ {0x1002, 0x9874, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ {0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ {0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+ /* stoney */
+ {0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU},
+ /* Polaris11 */
+ {0x1002, 0x67E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ {0x1002, 0x67E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+ /* Polaris10 */
+ {0x1002, 0x67C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67D0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x6FDF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ /* Polaris12 */
+ {0x1002, 0x6980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ /* VEGAM */
+ {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+ {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+ {0x1002, 0x694F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+ /* Vega 10 */
+ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x6869, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x686f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ /* Vega 12 */
+ {0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ /* Vega 20 */
+ {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ /* Raven */
+ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+ {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+ /* Arcturus */
+ {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+ {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+ {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+ {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+ /* Navi10 */
+ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ /* Navi14 */
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+
+ /* Renoir */
+ {0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+ {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+ {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+
+ /* Navi12 */
+ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+ {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+
+ /* Sienna_Cichlid */
+ {0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+
+ /* Yellow Carp */
+ {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+ {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+
+ /* Navy_Flounder */
+ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+ {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+
+ /* DIMGREY_CAVEFISH */
+ {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+
+ /* Aldebaran */
+ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+
+ /* CYAN_SKILLFISH */
+ {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+
+ /* BEIGE_GOBY */
+ {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+ {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_DISPLAY_VGA << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_DISPLAY_OTHER << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
+ { PCI_DEVICE(0x1002, PCI_ANY_ID),
+ .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
+ .class_mask = 0xffffff,
+ .driver_data = CHIP_IP_DISCOVERY },
+
+ {0, 0, 0}
+};
+
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+static const struct drm_driver amdgpu_kms_driver;
+
+static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
+{
+ struct pci_dev *p = NULL;
+ int i;
+
+ /* 0 - GPU
+ * 1 - audio
+ * 2 - USB
+ * 3 - UCSI
+ */
+ for (i = 1; i < 4; i++) {
+ p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+ adev->pdev->bus->number, i);
+ if (p) {
+ pm_runtime_get_sync(&p->dev);
+ pm_runtime_mark_last_busy(&p->dev);
+ pm_runtime_put_autosuspend(&p->dev);
+ pci_dev_put(p);
+ }
+ }
+}
+
+static int amdgpu_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct drm_device *ddev;
+ struct amdgpu_device *adev;
+ unsigned long flags = ent->driver_data;
+ int ret, retry = 0, i;
+ bool supports_atomic = false;
+
+ /* skip devices which are owned by radeon */
+ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
+ if (amdgpu_unsupported_pciidlist[i] == pdev->device)
+ return -ENODEV;
+ }
+
+ if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
+ amdgpu_aspm = 0;
+
+ if (amdgpu_virtual_display ||
+ amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ supports_atomic = true;
+
+ if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
+ DRM_INFO("This hardware requires experimental hardware support.\n"
+ "See modparam exp_hw_support\n");
+ return -ENODEV;
+ }
+ /* differentiate between P10 and P11 asics with the same DID */
+ if (pdev->device == 0x67FF &&
+ (pdev->revision == 0xE3 ||
+ pdev->revision == 0xE7 ||
+ pdev->revision == 0xF3 ||
+ pdev->revision == 0xF7)) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= CHIP_POLARIS10;
+ }
+
+ /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
+ * however, SME requires an indirect IOMMU mapping because the encryption
+ * bit is beyond the DMA mask of the chip.
+ */
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
+ ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
+ dev_info(&pdev->dev,
+ "SME is not compatible with RAVEN\n");
+ return -ENOTSUPP;
+ }
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+ if (!amdgpu_si_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ dev_info(&pdev->dev,
+ "SI support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ if (!amdgpu_cik_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dev_info(&pdev->dev,
+ "CIK support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+
+ adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
+ if (IS_ERR(adev))
+ return PTR_ERR(adev);
+
+ adev->dev = &pdev->dev;
+ adev->pdev = pdev;
+ ddev = adev_to_drm(adev);
+
+ if (!supports_atomic)
+ ddev->driver_features &= ~DRIVER_ATOMIC;
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ pci_set_drvdata(pdev, ddev);
+
+ ret = amdgpu_driver_load_kms(adev, flags);
+ if (ret)
+ goto err_pci;
+
+retry_init:
+ ret = drm_dev_register(ddev, flags);
+ if (ret == -EAGAIN && ++retry <= 3) {
+ DRM_INFO("retry init %d\n", retry);
+ /* Don't request EX mode too frequently which is attacking */
+ msleep(5000);
+ goto retry_init;
+ } else if (ret) {
+ goto err_pci;
+ }
+
+ ret = amdgpu_xcp_dev_register(adev, ent);
+ if (ret)
+ goto err_pci;
+
+ /*
+ * 1. don't init fbdev on hw without DCE
+ * 2. don't init fbdev if there are no connectors
+ */
+ if (adev->mode_info.mode_config_initialized &&
+ !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+ /* select 8 bpp console on low vram cards */
+ if (adev->gmc.real_vram_size <= (32*1024*1024))
+ drm_fbdev_generic_setup(adev_to_drm(adev), 8);
+ else
+ drm_fbdev_generic_setup(adev_to_drm(adev), 32);
+ }
+
+ ret = amdgpu_debugfs_init(adev);
+ if (ret)
+ DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
+
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ /* only need to skip on ATPX */
+ if (amdgpu_device_supports_px(ddev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+ /* we want direct complete for BOCO */
+ if (amdgpu_device_supports_boco(ddev))
+ dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
+ DPM_FLAG_SMART_SUSPEND |
+ DPM_FLAG_MAY_SKIP_RESUME);
+ pm_runtime_use_autosuspend(ddev->dev);
+ pm_runtime_set_autosuspend_delay(ddev->dev, 5000);
+
+ pm_runtime_allow(ddev->dev);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ pci_wake_from_d3(pdev, TRUE);
+
+ /*
+ * For runpm implemented via BACO, PMFW will handle the
+ * timing for BACO in and out:
+ * - put ASIC into BACO state only when both video and
+ * audio functions are in D3 state.
+ * - pull ASIC out of BACO state when either video or
+ * audio function is in D0 state.
+ * Also, at startup, PMFW assumes both functions are in
+ * D0 state.
+ *
+ * So if snd driver was loaded prior to amdgpu driver
+ * and audio function was put into D3 state, there will
+ * be no PMFW-aware D-state transition(D0->D3) on runpm
+ * suspend. Thus the BACO will be not correctly kicked in.
+ *
+ * Via amdgpu_get_secondary_funcs(), the audio dev is put
+ * into D0 state. Then there will be a PMFW-aware D-state
+ * transition(D0->D3) on runpm suspend.
+ */
+ if (amdgpu_device_supports_baco(ddev) &&
+ !(adev->flags & AMD_IS_APU) &&
+ (adev->asic_type >= CHIP_NAVI10))
+ amdgpu_get_secondary_funcs(adev);
+ }
+
+ return 0;
+
+err_pci:
+ pci_disable_device(pdev);
+ return ret;
+}
+
+static void
+amdgpu_pci_remove(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ amdgpu_xcp_dev_unplug(adev);
+ drm_dev_unplug(dev);
+
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+ pm_runtime_get_sync(dev->dev);
+ pm_runtime_forbid(dev->dev);
+ }
+
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ !amdgpu_sriov_vf(adev)) {
+ bool need_to_reset_gpu = false;
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ struct amdgpu_hive_info *hive;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive->device_remove_count == 0)
+ need_to_reset_gpu = true;
+ hive->device_remove_count++;
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ need_to_reset_gpu = true;
+ }
+
+ /* Workaround for ASICs need to reset SMU.
+ * Called only when the first device is removed.
+ */
+ if (need_to_reset_gpu) {
+ struct amdgpu_reset_context reset_context;
+
+ adev->shutdown = true;
+ memset(&reset_context, 0, sizeof(reset_context));
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+ }
+
+ amdgpu_driver_unload_kms(dev);
+
+ /*
+ * Flush any in flight DMA operations from device.
+ * Clear the Bus Master Enable bit and then wait on the PCIe Device
+ * StatusTransactions Pending bit.
+ */
+ pci_disable_device(pdev);
+ pci_wait_for_pending_transaction(pdev);
+}
+
+static void
+amdgpu_pci_shutdown(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (amdgpu_ras_intr_triggered())
+ return;
+
+ /* if we are running in a VM, make sure the device
+ * torn down properly on reboot/shutdown.
+ * unfortunately we can't detect certain
+ * hypervisors so just do this all the time.
+ */
+ if (!amdgpu_passthrough(adev))
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
+ amdgpu_device_ip_suspend(adev);
+ adev->mp1_state = PP_MP1_STATE_NONE;
+}
+
+/**
+ * amdgpu_drv_delayed_reset_work_handler - work handler for reset
+ *
+ * @work: work_struct.
+ */
+static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
+{
+ struct list_head device_list;
+ struct amdgpu_device *adev;
+ int i, r;
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ mutex_lock(&mgpu_info.mutex);
+ if (mgpu_info.pending_reset == true) {
+ mutex_unlock(&mgpu_info.mutex);
+ return;
+ }
+ mgpu_info.pending_reset = true;
+ mutex_unlock(&mgpu_info.mutex);
+
+ /* Use a common context, just need to make sure full reset is done */
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ reset_context.reset_req_dev = adev;
+ r = amdgpu_device_pre_asic_reset(adev, &reset_context);
+ if (r) {
+ dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
+ r, adev_to_drm(adev)->unique);
+ }
+ if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
+ r = -EALREADY;
+ }
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ flush_work(&adev->xgmi_reset_work);
+ adev->gmc.xgmi.pending_reset = false;
+ }
+
+ /* reset function will rebuild the xgmi hive info , clear it now */
+ for (i = 0; i < mgpu_info.num_dgpu; i++)
+ amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
+
+ INIT_LIST_HEAD(&device_list);
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++)
+ list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
+
+ /* unregister the GPU first, reset function will add them back */
+ list_for_each_entry(adev, &device_list, reset_list)
+ amdgpu_unregister_gpu_instance(adev);
+
+ /* Use a common context, just need to make sure full reset is done */
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ r = amdgpu_do_asic_reset(&device_list, &reset_context);
+
+ if (r) {
+ DRM_ERROR("reinit gpus failure");
+ return;
+ }
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+}
+
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+ if (amdgpu_device_supports_boco(drm_dev))
+ return pm_runtime_suspended(dev);
+
+ /* if we will not support s3 or s2i for the device
+ * then skip suspend
+ */
+ if (!amdgpu_acpi_is_s0ix_active(adev) &&
+ !amdgpu_acpi_is_s3_active(adev))
+ return 1;
+
+ return 0;
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+ /* nothing to do */
+}
+
+static int amdgpu_pmops_suspend(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = true;
+ else if (amdgpu_acpi_is_s3_active(adev))
+ adev->in_s3 = true;
+ if (!adev->in_s0ix && !adev->in_s3)
+ return 0;
+ return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (amdgpu_acpi_should_gpu_reset(adev))
+ return amdgpu_asic_reset(adev);
+
+ return 0;
+}
+
+static int amdgpu_pmops_resume(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
+
+ if (!adev->in_s0ix && !adev->in_s3)
+ return 0;
+
+ /* Avoids registers access if device is physically gone */
+ if (!pci_device_is_present(adev->pdev))
+ adev->no_hw_access = true;
+
+ r = amdgpu_device_resume(drm_dev, true);
+ if (amdgpu_acpi_is_s0ix_active(adev))
+ adev->in_s0ix = false;
+ else
+ adev->in_s3 = false;
+ return r;
+}
+
+static int amdgpu_pmops_freeze(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
+
+ adev->in_s4 = true;
+ r = amdgpu_device_suspend(drm_dev, true);
+ adev->in_s4 = false;
+ if (r)
+ return r;
+
+ if (amdgpu_acpi_should_gpu_reset(adev))
+ return amdgpu_asic_reset(adev);
+ return 0;
+}
+
+static int amdgpu_pmops_thaw(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+ return amdgpu_device_resume(drm_dev, true);
+}
+
+static int amdgpu_pmops_poweroff(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+ return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_restore(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+ return amdgpu_device_resume(drm_dev, true);
+}
+
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (adev->mode_info.num_crtc) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ int ret = 0;
+
+ if (amdgpu_runtime_pm != -2) {
+ /* XXX: Return busy if any displays are connected to avoid
+ * possible display wakeups after runtime resume due to
+ * hotplug events in case any displays were connected while
+ * the GPU was in suspend. Remove this once that is fixed.
+ */
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->status == connector_status_connected) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ if (adev->dc_enabled) {
+ struct drm_crtc *crtc;
+
+ drm_for_each_crtc(crtc, drm_dev) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
+ ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_pmops_runtime_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int ret, i;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
+ pm_runtime_forbid(dev);
+ return -EBUSY;
+ }
+
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ return ret;
+
+ /* wait for all rings to drain before suspending */
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring && ring->sched.ready) {
+ ret = amdgpu_fence_wait_empty(ring);
+ if (ret)
+ return -EBUSY;
+ }
+ }
+
+ adev->in_runpm = true;
+ if (amdgpu_device_supports_px(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+ /*
+ * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
+ * proper cleanups and put itself into a state ready for PNP. That
+ * can address some random resuming failure observed on BOCO capable
+ * platforms.
+ * TODO: this may be also needed for PX capable platform.
+ */
+ if (amdgpu_device_supports_boco(drm_dev))
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
+
+ ret = amdgpu_device_suspend(drm_dev, false);
+ if (ret) {
+ adev->in_runpm = false;
+ if (amdgpu_device_supports_boco(drm_dev))
+ adev->mp1_state = PP_MP1_STATE_NONE;
+ return ret;
+ }
+
+ if (amdgpu_device_supports_boco(drm_dev))
+ adev->mp1_state = PP_MP1_STATE_NONE;
+
+ if (amdgpu_device_supports_px(drm_dev)) {
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ amdgpu_device_cache_pci_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ } else if (amdgpu_device_supports_boco(drm_dev)) {
+ /* nothing to do */
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_enter(drm_dev);
+ }
+
+ dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
+ return 0;
+}
+
+static int amdgpu_pmops_runtime_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int ret;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+ return -EINVAL;
+
+ /* Avoids registers access if device is physically gone */
+ if (!pci_device_is_present(adev->pdev))
+ adev->no_hw_access = true;
+
+ if (amdgpu_device_supports_px(drm_dev)) {
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ pci_set_power_state(pdev, PCI_D0);
+ amdgpu_device_load_pci_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+ pci_set_master(pdev);
+ } else if (amdgpu_device_supports_boco(drm_dev)) {
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ pci_set_master(pdev);
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_exit(drm_dev);
+ }
+ ret = amdgpu_device_resume(drm_dev, false);
+ if (ret) {
+ if (amdgpu_device_supports_px(drm_dev))
+ pci_disable_device(pdev);
+ return ret;
+ }
+
+ if (amdgpu_device_supports_px(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+ adev->in_runpm = false;
+ return 0;
+}
+
+static int amdgpu_pmops_runtime_idle(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
+ int ret = 1;
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
+ pm_runtime_forbid(dev);
+ return -EBUSY;
+ }
+
+ ret = amdgpu_runtime_idle_check_display(dev);
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_autosuspend(dev);
+ return ret;
+}
+
+long amdgpu_drm_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct drm_file *file_priv = filp->private_data;
+ struct drm_device *dev;
+ long ret;
+
+ dev = file_priv->minor->dev;
+ ret = pm_runtime_get_sync(dev->dev);
+ if (ret < 0)
+ goto out;
+
+ ret = drm_ioctl(filp, cmd, arg);
+
+ pm_runtime_mark_last_busy(dev->dev);
+out:
+ pm_runtime_put_autosuspend(dev->dev);
+ return ret;
+}
+
+static const struct dev_pm_ops amdgpu_pm_ops = {
+ .prepare = amdgpu_pmops_prepare,
+ .complete = amdgpu_pmops_complete,
+ .suspend = amdgpu_pmops_suspend,
+ .suspend_noirq = amdgpu_pmops_suspend_noirq,
+ .resume = amdgpu_pmops_resume,
+ .freeze = amdgpu_pmops_freeze,
+ .thaw = amdgpu_pmops_thaw,
+ .poweroff = amdgpu_pmops_poweroff,
+ .restore = amdgpu_pmops_restore,
+ .runtime_suspend = amdgpu_pmops_runtime_suspend,
+ .runtime_resume = amdgpu_pmops_runtime_resume,
+ .runtime_idle = amdgpu_pmops_runtime_idle,
+};
+
+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *file_priv = f->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
+
+ timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout);
+ timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
+
+ return timeout >= 0 ? 0 : timeout;
+}
+
+static const struct file_operations amdgpu_driver_kms_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .flush = amdgpu_flush,
+ .release = drm_release,
+ .unlocked_ioctl = amdgpu_drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = amdgpu_kms_compat_ioctl,
+#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = drm_show_fdinfo,
+#endif
+};
+
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+ struct drm_file *file;
+
+ if (!filp)
+ return -EINVAL;
+
+ if (filp->f_op != &amdgpu_driver_kms_fops)
+ return -EINVAL;
+
+ file = filp->private_data;
+ *fpriv = file->driver_priv;
+ return 0;
+}
+
+const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ /* KMS */
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct drm_driver amdgpu_kms_driver = {
+ .driver_features =
+ DRIVER_ATOMIC |
+ DRIVER_GEM |
+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = amdgpu_driver_open_kms,
+ .postclose = amdgpu_driver_postclose_kms,
+ .lastclose = amdgpu_driver_lastclose_kms,
+ .ioctls = amdgpu_ioctls_kms,
+ .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+ .dumb_create = amdgpu_mode_dumb_create,
+ .dumb_map_offset = amdgpu_mode_dumb_mmap,
+ .fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo,
+#endif
+
+ .gem_prime_import = amdgpu_gem_prime_import,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = KMS_DRIVER_MAJOR,
+ .minor = KMS_DRIVER_MINOR,
+ .patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+const struct drm_driver amdgpu_partition_driver = {
+ .driver_features =
+ DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
+ .open = amdgpu_driver_open_kms,
+ .postclose = amdgpu_driver_postclose_kms,
+ .lastclose = amdgpu_driver_lastclose_kms,
+ .ioctls = amdgpu_ioctls_kms,
+ .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+ .dumb_create = amdgpu_mode_dumb_create,
+ .dumb_map_offset = amdgpu_mode_dumb_mmap,
+ .fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
+
+ .gem_prime_import = amdgpu_gem_prime_import,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = KMS_DRIVER_MAJOR,
+ .minor = KMS_DRIVER_MINOR,
+ .patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+static struct pci_error_handlers amdgpu_pci_err_handler = {
+ .error_detected = amdgpu_pci_error_detected,
+ .mmio_enabled = amdgpu_pci_mmio_enabled,
+ .slot_reset = amdgpu_pci_slot_reset,
+ .resume = amdgpu_pci_resume,
+};
+
+static const struct attribute_group *amdgpu_sysfs_groups[] = {
+ &amdgpu_vram_mgr_attr_group,
+ &amdgpu_gtt_mgr_attr_group,
+ &amdgpu_flash_attr_group,
+ NULL,
+};
+
+static struct pci_driver amdgpu_kms_pci_driver = {
+ .name = DRIVER_NAME,
+ .id_table = pciidlist,
+ .probe = amdgpu_pci_probe,
+ .remove = amdgpu_pci_remove,
+ .shutdown = amdgpu_pci_shutdown,
+ .driver.pm = &amdgpu_pm_ops,
+ .err_handler = &amdgpu_pci_err_handler,
+ .dev_groups = amdgpu_sysfs_groups,
+};
+
+static int __init amdgpu_init(void)
+{
+ int r;
+
+ if (drm_firmware_drivers_only())
+ return -EINVAL;
+
+ r = amdgpu_sync_init();
+ if (r)
+ goto error_sync;
+
+ r = amdgpu_fence_slab_init();
+ if (r)
+ goto error_fence;
+
+ DRM_INFO("amdgpu kernel modesetting enabled.\n");
+ amdgpu_register_atpx_handler();
+ amdgpu_acpi_detect();
+
+ /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+ amdgpu_amdkfd_init();
+
+ /* let modprobe override vga console setting */
+ return pci_register_driver(&amdgpu_kms_pci_driver);
+
+error_fence:
+ amdgpu_sync_fini();
+
+error_sync:
+ return r;
+}
+
+static void __exit amdgpu_exit(void)
+{
+ amdgpu_amdkfd_fini();
+ pci_unregister_driver(&amdgpu_kms_pci_driver);
+ amdgpu_unregister_atpx_handler();
+ amdgpu_acpi_release();
+ amdgpu_sync_fini();
+ amdgpu_fence_slab_fini();
+ mmu_notifier_synchronize();
+ amdgpu_xcp_drv_release();
+}
+
+module_init(amdgpu_init);
+module_exit(amdgpu_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
new file mode 100644
index 000000000..5bc2cb661
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -0,0 +1,53 @@
+/* amdgpu_drv.h -- Private header for amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DRV_H__
+#define __AMDGPU_DRV_H__
+
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+#include "amd_shared.h"
+
+/* General customization:
+ */
+
+#define DRIVER_AUTHOR "AMD linux driver team"
+
+#define DRIVER_NAME "amdgpu"
+#define DRIVER_DESC "AMD GPU"
+#define DRIVER_DATE "20150101"
+
+extern const struct drm_driver amdgpu_partition_driver;
+
+long amdgpu_drm_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
+long amdgpu_kms_compat_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
new file mode 100644
index 000000000..e71768661
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_eeprom.h"
+#include "amdgpu.h"
+
+/* AT24CM02 and M24M02-R have a 256-byte write page size.
+ */
+#define EEPROM_PAGE_BITS 8
+#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS)
+#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1)
+
+#define EEPROM_OFFSET_SIZE 2
+
+/* EEPROM memory addresses are 19-bits long, which can
+ * be partitioned into 3, 8, 8 bits, for a total of 19.
+ * The upper 3 bits are sent as part of the 7-bit
+ * "Device Type Identifier"--an I2C concept, which for EEPROM devices
+ * is hard-coded as 1010b, indicating that it is an EEPROM
+ * device--this is the wire format, followed by the upper
+ * 3 bits of the 19-bit address, followed by the direction,
+ * followed by two bytes holding the rest of the 16-bits of
+ * the EEPROM memory address. The format on the wire for EEPROM
+ * devices is: 1010XYZD, A15:A8, A7:A0,
+ * Where D is the direction and sequenced out by the hardware.
+ * Bits XYZ are memory address bits 18, 17 and 16.
+ * These bits are compared to how pins 1-3 of the part are connected,
+ * depending on the size of the part, more on that later.
+ *
+ * Note that of this wire format, a client is in control
+ * of, and needs to specify only XYZ, A15:A8, A7:0, bits,
+ * which is exactly the EEPROM memory address, or offset,
+ * in order to address up to 8 EEPROM devices on the I2C bus.
+ *
+ * For instance, a 2-Mbit I2C EEPROM part, addresses all its bytes,
+ * using an 18-bit address, bit 17 to 0 and thus would use all but one bit of
+ * the 19 bits previously mentioned. The designer would then not connect
+ * pins 1 and 2, and pin 3 usually named "A_2" or "E2", would be connected to
+ * either Vcc or GND. This would allow for up to two 2-Mbit parts on
+ * the same bus, where one would be addressable with bit 18 as 1, and
+ * the other with bit 18 of the address as 0.
+ *
+ * For a 2-Mbit part, bit 18 is usually known as the "Chip Enable" or
+ * "Hardware Address Bit". This bit is compared to the load on pin 3
+ * of the device, described above, and if there is a match, then this
+ * device responds to the command. This way, you can connect two
+ * 2-Mbit EEPROM devices on the same bus, but see one contiguous
+ * memory from 0 to 7FFFFh, where address 0 to 3FFFF is in the device
+ * whose pin 3 is connected to GND, and address 40000 to 7FFFFh is in
+ * the 2nd device, whose pin 3 is connected to Vcc.
+ *
+ * This addressing you encode in the 32-bit "eeprom_addr" below,
+ * namely the 19-bits "XYZ,A15:A0", as a single 19-bit address. For
+ * instance, eeprom_addr = 0x6DA01, is 110_1101_1010_0000_0001, where
+ * XYZ=110b, and A15:A0=DA01h. The XYZ bits become part of the device
+ * address, and the rest of the address bits are sent as the memory
+ * address bytes.
+ *
+ * That is, for an I2C EEPROM driver everything is controlled by
+ * the "eeprom_addr".
+ *
+ * See also top of amdgpu_ras_eeprom.c.
+ *
+ * P.S. If you need to write, lock and read the Identification Page,
+ * (M24M02-DR device only, which we do not use), change the "7" to
+ * "0xF" in the macro below, and let the client set bit 20 to 1 in
+ * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to
+ * 1 to lock it permanently.
+ */
+#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
+
+static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+ u8 *eeprom_buf, u16 buf_size, bool read)
+{
+ u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
+ struct i2c_msg msgs[] = {
+ {
+ .flags = 0,
+ .len = EEPROM_OFFSET_SIZE,
+ .buf = eeprom_offset_buf,
+ },
+ {
+ .flags = read ? I2C_M_RD : 0,
+ },
+ };
+ const u8 *p = eeprom_buf;
+ int r;
+ u16 len;
+
+ for (r = 0; buf_size > 0;
+ buf_size -= len, eeprom_addr += len, eeprom_buf += len) {
+ /* Set the EEPROM address we want to write to/read from.
+ */
+ msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr);
+ msgs[1].addr = msgs[0].addr;
+ msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff;
+ msgs[0].buf[1] = eeprom_addr & 0xff;
+
+ if (!read) {
+ /* Write the maximum amount of data, without
+ * crossing the device's page boundary, as per
+ * its spec. Partial page writes are allowed,
+ * starting at any location within the page,
+ * so long as the page boundary isn't crossed
+ * over (actually the page pointer rolls
+ * over).
+ *
+ * As per the AT24CM02 EEPROM spec, after
+ * writing into a page, the I2C driver should
+ * terminate the transfer, i.e. in
+ * "i2c_transfer()" below, with a STOP
+ * condition, so that the self-timed write
+ * cycle begins. This is implied for the
+ * "i2c_transfer()" abstraction.
+ */
+ len = min(EEPROM_PAGE_SIZE - (eeprom_addr &
+ EEPROM_PAGE_MASK),
+ (u32)buf_size);
+ } else {
+ /* Reading from the EEPROM has no limitation
+ * on the number of bytes read from the EEPROM
+ * device--they are simply sequenced out.
+ */
+ len = buf_size;
+ }
+ msgs[1].len = len;
+ msgs[1].buf = eeprom_buf;
+
+ /* This constitutes a START-STOP transaction.
+ */
+ r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs));
+ if (r != ARRAY_SIZE(msgs))
+ break;
+
+ if (!read) {
+ /* According to EEPROM specs the length of the
+ * self-writing cycle, tWR (tW), is 10 ms.
+ *
+ * TODO: Use polling on ACK, aka Acknowledge
+ * Polling, to minimize waiting for the
+ * internal write cycle to complete, as it is
+ * usually smaller than tWR (tW).
+ */
+ msleep(10);
+ }
+ }
+
+ return r < 0 ? r : eeprom_buf - p;
+}
+
+/**
+ * amdgpu_eeprom_xfer -- Read/write from/to an I2C EEPROM device
+ * @i2c_adap: pointer to the I2C adapter to use
+ * @eeprom_addr: EEPROM address from which to read/write
+ * @eeprom_buf: pointer to data buffer to read into/write from
+ * @buf_size: the size of @eeprom_buf
+ * @read: True if reading from the EEPROM, false if writing
+ *
+ * Returns the number of bytes read/written; -errno on error.
+ */
+static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+ u8 *eeprom_buf, u16 buf_size, bool read)
+{
+ const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
+ u16 limit;
+ u16 ps; /* Partial size */
+ int res = 0, r;
+
+ if (!quirks)
+ limit = 0;
+ else if (read)
+ limit = quirks->max_read_len;
+ else
+ limit = quirks->max_write_len;
+
+ if (limit == 0) {
+ return __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+ eeprom_buf, buf_size, read);
+ } else if (limit <= EEPROM_OFFSET_SIZE) {
+ dev_err_ratelimited(&i2c_adap->dev,
+ "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
+ eeprom_addr, buf_size,
+ read ? "read" : "write", EEPROM_OFFSET_SIZE);
+ return -EINVAL;
+ }
+
+ /* The "limit" includes all data bytes sent/received,
+ * which would include the EEPROM_OFFSET_SIZE bytes.
+ * Account for them here.
+ */
+ limit -= EEPROM_OFFSET_SIZE;
+ for ( ; buf_size > 0;
+ buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) {
+ ps = min(limit, buf_size);
+
+ r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+ eeprom_buf, ps, read);
+ if (r < 0)
+ return r;
+ res += r;
+ }
+
+ return res;
+}
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u16 bytes)
+{
+ return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+ true);
+}
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u16 bytes)
+{
+ return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+ false);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
new file mode 100644
index 000000000..6935adb2b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_EEPROM_H
+#define _AMDGPU_EEPROM_H
+
+#include <linux/i2c.h>
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u16 bytes);
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+ u32 eeprom_addr, u8 *eeprom_buf,
+ u16 bytes);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
new file mode 100644
index 000000000..3aaeed2d3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+
+void
+amdgpu_link_encoder_connector(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector;
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ /* walk the list and link encoders to connectors */
+ drm_for_each_connector_iter(connector, &iter) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+ drm_connector_attach_encoder(connector, encoder);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);
+ adev->mode_info.bl_encoder = amdgpu_encoder;
+ }
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
+ DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
+ amdgpu_encoder->active_device, amdgpu_encoder->devices,
+ amdgpu_connector->devices, encoder->encoder_type);
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ if (amdgpu_encoder->active_device & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ return found;
+}
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ return found;
+}
+
+struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_encoder *other_encoder;
+ struct amdgpu_encoder *other_amdgpu_encoder;
+
+ if (amdgpu_encoder->is_ext_encoder)
+ return NULL;
+
+ list_for_each_entry(other_encoder, &dev->mode_config.encoder_list, head) {
+ if (other_encoder == encoder)
+ continue;
+ other_amdgpu_encoder = to_amdgpu_encoder(other_encoder);
+ if (other_amdgpu_encoder->is_ext_encoder &&
+ (amdgpu_encoder->devices & other_amdgpu_encoder->devices))
+ return other_encoder;
+ }
+ return NULL;
+}
+
+u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder)
+{
+ struct drm_encoder *other_encoder = amdgpu_get_external_encoder(encoder);
+
+ if (other_encoder) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(other_encoder);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ return amdgpu_encoder->encoder_id;
+ default:
+ return ENCODER_OBJECT_ID_NONE;
+ }
+ }
+ return ENCODER_OBJECT_ID_NONE;
+}
+
+void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+ unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
+ unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
+ unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
+ unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
+ unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
+ unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
+
+ adjusted_mode->clock = native_mode->clock;
+ adjusted_mode->flags = native_mode->flags;
+
+ adjusted_mode->hdisplay = native_mode->hdisplay;
+ adjusted_mode->vdisplay = native_mode->vdisplay;
+
+ adjusted_mode->htotal = native_mode->hdisplay + hblank;
+ adjusted_mode->hsync_start = native_mode->hdisplay + hover;
+ adjusted_mode->hsync_end = adjusted_mode->hsync_start + hsync_width;
+
+ adjusted_mode->vtotal = native_mode->vdisplay + vblank;
+ adjusted_mode->vsync_start = native_mode->vdisplay + vover;
+ adjusted_mode->vsync_end = adjusted_mode->vsync_start + vsync_width;
+
+ drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+
+ adjusted_mode->crtc_hdisplay = native_mode->hdisplay;
+ adjusted_mode->crtc_vdisplay = native_mode->vdisplay;
+
+ adjusted_mode->crtc_htotal = adjusted_mode->crtc_hdisplay + hblank;
+ adjusted_mode->crtc_hsync_start = adjusted_mode->crtc_hdisplay + hover;
+ adjusted_mode->crtc_hsync_end = adjusted_mode->crtc_hsync_start + hsync_width;
+
+ adjusted_mode->crtc_vtotal = adjusted_mode->crtc_vdisplay + vblank;
+ adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + vover;
+ adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + vsync_width;
+
+}
+
+bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
+ u32 pixel_clock)
+{
+ struct drm_connector *connector;
+ struct amdgpu_connector *amdgpu_connector;
+ struct amdgpu_connector_atom_dig *dig_connector;
+
+ connector = amdgpu_get_connector_for_encoder(encoder);
+ /* if we don't have an active device yet, just use one of
+ * the connectors tied to the encoder.
+ */
+ if (!connector)
+ connector = amdgpu_get_connector_for_encoder_init(encoder);
+ amdgpu_connector = to_amdgpu_connector(connector);
+
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_HDMIB:
+ if (amdgpu_connector->use_digital) {
+ /* HDMI 1.3 supports up to 340 Mhz over single link */
+ if (connector->display_info.is_hdmi) {
+ if (pixel_clock > 340000)
+ return true;
+ else
+ return false;
+ } else {
+ if (pixel_clock > 165000)
+ return true;
+ else
+ return false;
+ }
+ } else
+ return false;
+ case DRM_MODE_CONNECTOR_DVID:
+ case DRM_MODE_CONNECTOR_HDMIA:
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ dig_connector = amdgpu_connector->con_priv;
+ if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+ (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
+ return false;
+ else {
+ /* HDMI 1.3 supports up to 340 Mhz over single link */
+ if (connector->display_info.is_hdmi) {
+ if (pixel_clock > 340000)
+ return true;
+ else
+ return false;
+ } else {
+ if (pixel_clock > 165000)
+ return true;
+ else
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
new file mode 100644
index 000000000..6038b5021
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: MIT
+/* Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_ctx.h"
+#include "amdgpu_fdinfo.h"
+
+
+static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = "gfx",
+ [AMDGPU_HW_IP_COMPUTE] = "compute",
+ [AMDGPU_HW_IP_DMA] = "dma",
+ [AMDGPU_HW_IP_UVD] = "dec",
+ [AMDGPU_HW_IP_VCE] = "enc",
+ [AMDGPU_HW_IP_UVD_ENC] = "enc_1",
+ [AMDGPU_HW_IP_VCN_DEC] = "dec",
+ [AMDGPU_HW_IP_VCN_ENC] = "enc",
+ [AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
+};
+
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+ struct amdgpu_fpriv *fpriv = file->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct amdgpu_mem_stats stats;
+ ktime_t usage[AMDGPU_HW_IP_NUM];
+ uint32_t bus, dev, fn, domain;
+ unsigned int hw_ip;
+ int ret;
+
+ memset(&stats, 0, sizeof(stats));
+ bus = adev->pdev->bus->number;
+ domain = pci_domain_nr(adev->pdev->bus);
+ dev = PCI_SLOT(adev->pdev->devfn);
+ fn = PCI_FUNC(adev->pdev->devfn);
+
+ ret = amdgpu_bo_reserve(vm->root.bo, false);
+ if (ret)
+ return;
+
+ amdgpu_vm_get_memory(vm, &stats);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
+
+ /*
+ * ******************************************************************
+ * For text output format description please see drm-usage-stats.rst!
+ * ******************************************************************
+ */
+
+ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
+ drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name);
+ drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
+ drm_printf(p, "drm-client-id:\t%llu\n", vm->immediate.fence_context);
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
+ drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
+ stats.visible_vram/1024UL);
+ drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
+ stats.evicted_vram/1024UL);
+ drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
+ stats.evicted_visible_vram/1024UL);
+ drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
+ stats.requested_vram/1024UL);
+ drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
+ stats.requested_visible_vram/1024UL);
+ drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
+ stats.requested_gtt/1024UL);
+ for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+ if (!usage[hw_ip])
+ continue;
+
+ drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip],
+ ktime_to_ns(usage[hw_ip]));
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
new file mode 100644
index 000000000..0398f5a15
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+#ifndef __AMDGPU_SMI_H__
+#define __AMDGPU_SMI_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+
+uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
new file mode 100644
index 000000000..7537f5aa7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ * Dave Airlie
+ */
+#include <linux/seq_file.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+};
+
+static struct kmem_cache *amdgpu_fence_slab;
+
+int amdgpu_fence_slab_init(void)
+{
+ amdgpu_fence_slab = kmem_cache_create(
+ "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_fence_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void amdgpu_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_fence_slab);
+}
+/*
+ * Cast helper
+ */
+static const struct dma_fence_ops amdgpu_fence_ops;
+static const struct dma_fence_ops amdgpu_job_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
+{
+ struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+ if (__f->base.ops == &amdgpu_fence_ops ||
+ __f->base.ops == &amdgpu_job_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
+/**
+ * amdgpu_fence_write - write a fence value
+ *
+ * @ring: ring the fence is associated with
+ * @seq: sequence number to write
+ *
+ * Writes a fence value to memory (all asics).
+ */
+static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+
+ if (drv->cpu_addr)
+ *drv->cpu_addr = cpu_to_le32(seq);
+}
+
+/**
+ * amdgpu_fence_read - read a fence value
+ *
+ * @ring: ring the fence is associated with
+ *
+ * Reads a fence value from memory (all asics).
+ * Returns the value of the fence read from memory.
+ */
+static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ u32 seq = 0;
+
+ if (drv->cpu_addr)
+ seq = le32_to_cpu(*drv->cpu_addr);
+ else
+ seq = atomic_read(&drv->last_seq);
+
+ return seq;
+}
+
+/**
+ * amdgpu_fence_emit - emit a fence on the requested ring
+ *
+ * @ring: ring the fence is associated with
+ * @f: resulting fence object
+ * @job: job the fence is embedded in
+ * @flags: flags to pass into the subordinate .emit_fence() call
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
+ unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *fence;
+ struct amdgpu_fence *am_fence;
+ struct dma_fence __rcu **ptr;
+ uint32_t seq;
+ int r;
+
+ if (job == NULL) {
+ /* create a sperate hw fence */
+ am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
+ if (am_fence == NULL)
+ return -ENOMEM;
+ fence = &am_fence->base;
+ am_fence->ring = ring;
+ } else {
+ /* take use of job-embedded fence */
+ fence = &job->hw_fence;
+ }
+
+ seq = ++ring->fence_drv.sync_seq;
+ if (job && job->job_run_counter) {
+ /* reinit seq for resubmitted jobs */
+ fence->seqno = seq;
+ /* TO be inline with external fence creation and other drivers */
+ dma_fence_get(fence);
+ } else {
+ if (job) {
+ dma_fence_init(fence, &amdgpu_job_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
+ /* Against remove in amdgpu_job_{free, free_cb} */
+ dma_fence_get(fence);
+ } else {
+ dma_fence_init(fence, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
+ }
+ }
+
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+ seq, flags | AMDGPU_FENCE_FLAG_INT);
+ pm_runtime_get_noresume(adev_to_drm(adev)->dev);
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ if (unlikely(rcu_dereference_protected(*ptr, 1))) {
+ struct dma_fence *old;
+
+ rcu_read_lock();
+ old = dma_fence_get_rcu_safe(ptr);
+ rcu_read_unlock();
+
+ if (old) {
+ r = dma_fence_wait(old, false);
+ dma_fence_put(old);
+ if (r)
+ return r;
+ }
+ }
+
+ to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+ rcu_assign_pointer(*ptr, dma_fence_get(fence));
+
+ *f = fence;
+
+ return 0;
+}
+
+/**
+ * amdgpu_fence_emit_polling - emit a fence on the requeste ring
+ *
+ * @ring: ring the fence is associated with
+ * @s: resulting sequence number
+ * @timeout: the timeout for waiting in usecs
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Used For polling fence.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout)
+{
+ uint32_t seq;
+ signed long r;
+
+ if (!s)
+ return -EINVAL;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ return -ETIMEDOUT;
+
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+ seq, 0);
+
+ *s = seq;
+
+ return 0;
+}
+
+/**
+ * amdgpu_fence_schedule_fallback - schedule fallback check
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Start a timer as fallback to our interrupts.
+ */
+static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+{
+ mod_timer(&ring->fence_drv.fallback_timer,
+ jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
+}
+
+/**
+ * amdgpu_fence_process - check for fence activity
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Checks the current fence value and calculates the last
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
+ *
+ * Returns true if fence was processed
+ */
+bool amdgpu_fence_process(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t seq, last_seq;
+
+ do {
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ seq = amdgpu_fence_read(ring);
+
+ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
+
+ if (del_timer(&ring->fence_drv.fallback_timer) &&
+ seq != ring->fence_drv.sync_seq)
+ amdgpu_fence_schedule_fallback(ring);
+
+ if (unlikely(seq == last_seq))
+ return false;
+
+ last_seq &= drv->num_fences_mask;
+ seq &= drv->num_fences_mask;
+
+ do {
+ struct dma_fence *fence, **ptr;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ ptr = &drv->fences[last_seq];
+
+ /* There is always exactly one thread signaling this fence slot */
+ fence = rcu_dereference_protected(*ptr, 1);
+ RCU_INIT_POINTER(*ptr, NULL);
+
+ if (!fence)
+ continue;
+
+ dma_fence_signal(fence);
+ dma_fence_put(fence);
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ } while (last_seq != seq);
+
+ return true;
+}
+
+/**
+ * amdgpu_fence_fallback - fallback for hardware interrupts
+ *
+ * @t: timer context used to obtain the pointer to ring structure
+ *
+ * Checks for fence activity.
+ */
+static void amdgpu_fence_fallback(struct timer_list *t)
+{
+ struct amdgpu_ring *ring = from_timer(ring, t,
+ fence_drv.fallback_timer);
+
+ if (amdgpu_fence_process(ring))
+ DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+}
+
+/**
+ * amdgpu_fence_wait_empty - wait for all fences to signal
+ *
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns 0 if the fences have passed, error for all other cases.
+ */
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
+{
+ uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq);
+ struct dma_fence *fence, **ptr;
+ int r;
+
+ if (!seq)
+ return 0;
+
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ fence = rcu_dereference(*ptr);
+ if (!fence || !dma_fence_get_rcu(fence)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ return r;
+}
+
+/**
+ * amdgpu_fence_wait_polling - busy wait for givn sequence number
+ *
+ * @ring: ring index the fence is associated with
+ * @wait_seq: sequence number to wait
+ * @timeout: the timeout for waiting in usecs
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns left time if no timeout, 0 or minus if timeout.
+ */
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+ uint32_t wait_seq,
+ signed long timeout)
+{
+
+ while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
+ return timeout > 0 ? timeout : 0;
+}
+/**
+ * amdgpu_fence_count_emitted - get the count of emitted fences
+ *
+ * @ring: ring the fence is associated with
+ *
+ * Get the number of fences emitted on the requested ring (all asics).
+ * Returns the number of emitted fences on the ring. Used by the
+ * dynpm code to ring track activity.
+ */
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+{
+ uint64_t emitted;
+
+ /* We are not protected by ring lock when reading the last sequence
+ * but it's ok to report slightly wrong fence count here.
+ */
+ emitted = 0x100000000ull;
+ emitted -= atomic_read(&ring->fence_drv.last_seq);
+ emitted += READ_ONCE(ring->fence_drv.sync_seq);
+ return lower_32_bits(emitted);
+}
+
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+ uint32_t last_seq, sync_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+ if (last_seq == sync_seq)
+ return 0;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ fence = drv->fences[last_seq];
+ if (!fence)
+ return 0;
+
+ return ktime_us_delta(ktime_get(),
+ to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct dma_fence *fence;
+
+ seq &= drv->num_fences_mask;
+ fence = drv->fences[seq];
+ if (!fence)
+ return;
+
+ to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
+ * amdgpu_fence_driver_start_ring - make the fence driver
+ * ready for use on the requested ring.
+ *
+ * @ring: ring to start the fence driver on
+ * @irq_src: interrupt source to use for this ring
+ * @irq_type: interrupt type to use for this ring
+ *
+ * Make the fence driver ready for processing (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has.
+ * Returns 0 for success, errors for failure.
+ */
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq_src,
+ unsigned int irq_type)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint64_t index;
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
+ ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
+ ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
+ } else {
+ /* put fence directly behind firmware */
+ index = ALIGN(adev->uvd.fw->size, 8);
+ ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+ ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
+ }
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
+
+ ring->fence_drv.irq_src = irq_src;
+ ring->fence_drv.irq_type = irq_type;
+ ring->fence_drv.initialized = true;
+
+ DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
+ ring->name, ring->fence_drv.gpu_addr);
+ return 0;
+}
+
+/**
+ * amdgpu_fence_driver_init_ring - init the fence driver
+ * for the requested ring.
+ *
+ * @ring: ring to init the fence driver on
+ *
+ * Init the fence driver for the requested ring (all asics).
+ * Helper function for amdgpu_fence_driver_init().
+ */
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (!adev)
+ return -EINVAL;
+
+ if (!is_power_of_2(ring->num_hw_submission))
+ return -EINVAL;
+
+ ring->fence_drv.cpu_addr = NULL;
+ ring->fence_drv.gpu_addr = 0;
+ ring->fence_drv.sync_seq = 0;
+ atomic_set(&ring->fence_drv.last_seq, 0);
+ ring->fence_drv.initialized = false;
+
+ timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
+
+ ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
+ GFP_KERNEL);
+
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * amdgpu_fence_driver_sw_init - init the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Init the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * amdgpu_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_gfx_power_domain = false;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_SDMA:
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0))
+ is_gfx_power_domain = true;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
+ is_gfx_power_domain = true;
+ break;
+ default:
+ break;
+ }
+
+ return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
+ * amdgpu_fence_driver_hw_fini - tear down the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Tear down the fence driver for all possible rings (all asics).
+ */
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+ /* You can't wait for HW to signal if it's gone */
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)))
+ r = amdgpu_fence_wait_empty(ring);
+ else
+ r = -ENODEV;
+ /* no need to trigger GPU reset as we are unloading */
+ if (r)
+ amdgpu_fence_driver_force_completion(ring);
+
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+ ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
+ amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
+
+ del_timer_sync(&ring->fence_drv.fallback_timer);
+ }
+}
+
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
+ continue;
+
+ if (stop)
+ disable_irq(adev->irq.irq);
+ else
+ enable_irq(adev->irq.irq);
+ }
+}
+
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+ /*
+ * Notice we check for sched.ops since there's some
+ * override on the meaning of sched.ready by amdgpu.
+ * The natural check would be sched.ready, which is
+ * set as drm_sched_init() finishes...
+ */
+ if (ring->sched.ops)
+ drm_sched_fini(&ring->sched);
+
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ dma_fence_put(ring->fence_drv.fences[j]);
+ kfree(ring->fence_drv.fences);
+ ring->fence_drv.fences = NULL;
+ ring->fence_drv.initialized = false;
+ }
+}
+
+/**
+ * amdgpu_fence_driver_hw_init - enable the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Enable the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * amdgpu_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+ /* enable the interrupt */
+ if (ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
+ amdgpu_irq_get(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
+ }
+}
+
+/**
+ * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
+ *
+ * @ring: fence of the ring to be cleared
+ *
+ */
+void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
+{
+ int i;
+ struct dma_fence *old, **ptr;
+
+ for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
+ ptr = &ring->fence_drv.fences[i];
+ old = rcu_dereference_protected(*ptr, 1);
+ if (old && old->ops == &amdgpu_job_fence_ops) {
+ struct amdgpu_job *job;
+
+ /* For non-scheduler bad job, i.e. failed ib test, we need to signal
+ * it right here or we won't be able to track them in fence_drv
+ * and they will remain unsignaled during sa_bo free.
+ */
+ job = container_of(old, struct amdgpu_job, hw_fence);
+ if (!job->base.s_fence && !dma_fence_is_signaled(old))
+ dma_fence_signal(old);
+ RCU_INIT_POINTER(*ptr, NULL);
+ dma_fence_put(old);
+ }
+ }
+}
+
+/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&drv->lock, flags);
+ for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+ struct dma_fence *fence;
+
+ fence = rcu_dereference_protected(drv->fences[i],
+ lockdep_is_held(&drv->lock));
+ if (fence && !dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, error);
+ }
+ spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
+ * amdgpu_fence_driver_force_completion - force signal latest fence of ring
+ *
+ * @ring: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
+{
+ amdgpu_fence_driver_set_error(ring, -ECANCELED);
+ amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
+ amdgpu_fence_process(ring);
+}
+
+/*
+ * Common fence implementation
+ */
+
+static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu";
+}
+
+static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
+{
+ return (const char *)to_amdgpu_fence(f)->ring->name;
+}
+
+static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+
+ return (const char *)to_amdgpu_ring(job->base.sched)->name;
+}
+
+/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @f: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
+{
+ if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
+
+ return true;
+}
+
+/**
+ * amdgpu_job_fence_enable_signaling - enable signalling on job fence
+ * @f: fence
+ *
+ * This is the simliar function with amdgpu_fence_enable_signaling above, it
+ * only handles the job embedded fence.
+ */
+static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+
+ if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
+
+ return true;
+}
+
+/**
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the fence memory after the RCU grace period.
+ */
+static void amdgpu_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+
+ /* free fence_slab if it's separated fence*/
+ kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
+}
+
+/**
+ * amdgpu_job_fence_free - free up the job with embedded fence
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the job with embedded fence after the RCU grace period.
+ */
+static void amdgpu_job_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+
+ /* free job if fence has a parent job */
+ kfree(container_of(f, struct amdgpu_job, hw_fence));
+}
+
+/**
+ * amdgpu_fence_release - callback that fence can be freed
+ *
+ * @f: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amdgpu_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_fence_free);
+}
+
+/**
+ * amdgpu_job_fence_release - callback that job embedded fence can be freed
+ *
+ * @f: fence
+ *
+ * This is the simliar function with amdgpu_fence_release above, it
+ * only handles the job embedded fence.
+ */
+static void amdgpu_job_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_job_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_fence_ops = {
+ .get_driver_name = amdgpu_fence_get_driver_name,
+ .get_timeline_name = amdgpu_fence_get_timeline_name,
+ .enable_signaling = amdgpu_fence_enable_signaling,
+ .release = amdgpu_fence_release,
+};
+
+static const struct dma_fence_ops amdgpu_job_fence_ops = {
+ .get_driver_name = amdgpu_fence_get_driver_name,
+ .get_timeline_name = amdgpu_job_fence_get_timeline_name,
+ .enable_signaling = amdgpu_job_fence_enable_signaling,
+ .release = amdgpu_job_fence_release,
+};
+
+/*
+ * Fence debugfs
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized)
+ continue;
+
+ amdgpu_fence_process(ring);
+
+ seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
+ seq_printf(m, "Last signaled fence 0x%08x\n",
+ atomic_read(&ring->fence_drv.last_seq));
+ seq_printf(m, "Last emitted 0x%08x\n",
+ ring->fence_drv.sync_seq);
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
+ ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
+ seq_printf(m, "Last signaled trailing fence 0x%08x\n",
+ le32_to_cpu(*ring->trail_fence_cpu_addr));
+ seq_printf(m, "Last emitted 0x%08x\n",
+ ring->trail_seq);
+ }
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+ continue;
+
+ /* set in CP_VMID_PREEMPT and preemption occurred */
+ seq_printf(m, "Last preempted 0x%08x\n",
+ le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
+ /* set in CP_VMID_RESET and reset occurred */
+ seq_printf(m, "Last reset 0x%08x\n",
+ le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
+ /* Both preemption and reset occurred */
+ seq_printf(m, "Last both 0x%08x\n",
+ le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
+ }
+ return 0;
+}
+
+/*
+ * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int gpu_recover_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return 0;
+ }
+
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ *val = atomic_read(&adev->reset_domain->reset_res);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
+ "%lld\n");
+
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+#endif
+
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
+ &amdgpu_debugfs_fence_info_fops);
+
+ if (!amdgpu_sriov_vf(adev)) {
+
+ INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
+ debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
+ &amdgpu_debugfs_gpu_recover_fops);
+ }
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
new file mode 100644
index 000000000..7cd0dfaee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "smu_v11_0_i2c.h"
+#include "atom.h"
+#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_eeprom.h"
+
+#define FRU_EEPROM_MADDR_6 0x60000
+#define FRU_EEPROM_MADDR_8 0x80000
+
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
+{
+ /* Only server cards have the FRU EEPROM
+ * TODO: See if we can figure this out dynamically instead of
+ * having to parse VBIOS versions.
+ */
+ struct atom_context *atom_ctx = adev->mode_info.atom_context;
+
+ /* The i2c access is blocked on VF
+ * TODO: Need other way to get the info
+ */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ /* The default I2C EEPROM address of the FRU.
+ */
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+
+ /* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,
+ * we can use just the "DXXX" portion. If there were more models, we
+ * could convert the 3 characters to a hex integer and use a switch
+ * for ease/speed/readability. For now, 2 string comparisons are
+ * reasonable and not too expensive
+ */
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ /* D161 and D163 are the VG20 server SKUs */
+ if (strnstr(atom_ctx->vbios_pn, "D161",
+ sizeof(atom_ctx->vbios_pn)) ||
+ strnstr(atom_ctx->vbios_pn, "D163",
+ sizeof(atom_ctx->vbios_pn))) {
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ } else {
+ return false;
+ }
+ case CHIP_ALDEBARAN:
+ /* All Aldebaran SKUs have an FRU */
+ if (!strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ case CHIP_SIENNA_CICHLID:
+ if (strnstr(atom_ctx->vbios_pn, "D603",
+ sizeof(atom_ctx->vbios_pn))) {
+ if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
+ sizeof(atom_ctx->vbios_pn))) {
+ return false;
+ }
+
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+
+ } else {
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
+{
+ unsigned char buf[8], *pia;
+ u32 addr, fru_addr;
+ int size, len;
+ u8 csum;
+
+ if (!is_fru_eeprom_supported(adev, &fru_addr))
+ return 0;
+
+ /* If algo exists, it means that the i2c_adapter's initialized */
+ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) {
+ DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
+ return -ENODEV;
+ }
+
+ /* Read the IPMI Common header */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
+ sizeof(buf));
+ if (len != 8) {
+ DRM_ERROR("Couldn't read the IPMI Common Header: %d", len);
+ return len < 0 ? len : -EIO;
+ }
+
+ if (buf[0] != 1) {
+ DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]);
+ return -EIO;
+ }
+
+ for (csum = 0; len > 0; len--)
+ csum += buf[len - 1];
+ if (csum) {
+ DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum);
+ return -EIO;
+ }
+
+ /* Get the offset to the Product Info Area (PIA). */
+ addr = buf[4] * 8;
+ if (!addr)
+ return 0;
+
+ /* Get the absolute address to the PIA. */
+ addr += fru_addr;
+
+ /* Read the header of the PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
+ if (len != 3) {
+ DRM_ERROR("Couldn't read the Product Info Area header: %d", len);
+ return len < 0 ? len : -EIO;
+ }
+
+ if (buf[0] != 1) {
+ DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]);
+ return -EIO;
+ }
+
+ size = buf[1] * 8;
+ pia = kzalloc(size, GFP_KERNEL);
+ if (!pia)
+ return -ENOMEM;
+
+ /* Read the whole PIA. */
+ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
+ if (len != size) {
+ kfree(pia);
+ DRM_ERROR("Couldn't read the Product Info Area: %d", len);
+ return len < 0 ? len : -EIO;
+ }
+
+ for (csum = 0; size > 0; size--)
+ csum += pia[size - 1];
+ if (csum) {
+ DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum);
+ kfree(pia);
+ return -EIO;
+ }
+
+ /* Now extract useful information from the PIA.
+ *
+ * Skip the Manufacturer Name at [3] and go directly to
+ * the Product Name field.
+ */
+ addr = 3 + 1 + (pia[3] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(adev->product_name, pia + addr + 1,
+ min_t(size_t,
+ sizeof(adev->product_name),
+ pia[addr] & 0x3F));
+ adev->product_name[sizeof(adev->product_name) - 1] = '\0';
+
+ /* Go to the Product Part/Model Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(adev->product_number, pia + addr + 1,
+ min_t(size_t,
+ sizeof(adev->product_number),
+ pia[addr] & 0x3F));
+ adev->product_number[sizeof(adev->product_number) - 1] = '\0';
+
+ /* Go to the Product Version field. */
+ addr += 1 + (pia[addr] & 0x3F);
+
+ /* Go to the Product Serial Number field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(adev->serial, pia + addr + 1, min_t(size_t,
+ sizeof(adev->serial),
+ pia[addr] & 0x3F));
+ adev->serial[sizeof(adev->serial) - 1] = '\0';
+Out:
+ kfree(pia);
+ return 0;
+}
+
+/**
+ * DOC: product_name
+ *
+ * The amdgpu driver provides a sysfs API for reporting the product name
+ * for the device
+ * The file product_name is used for this and returns the product name
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->product_name);
+}
+
+static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
+
+/**
+ * DOC: product_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the part number
+ * for the device
+ * The file product_number is used for this and returns the part number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->product_number);
+}
+
+static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
+
+/**
+ * DOC: serial_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the serial number
+ * for the device
+ * The file serial_number is used for this and returns the serial number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->serial);
+}
+
+static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
+
+static const struct attribute *amdgpu_fru_attributes[] = {
+ &dev_attr_product_name.attr,
+ &dev_attr_product_number.attr,
+ &dev_attr_serial_number.attr,
+ NULL
+};
+
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!is_fru_eeprom_supported(adev, NULL))
+ return 0;
+
+ return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
+
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!is_fru_eeprom_supported(adev, NULL))
+ return;
+
+ sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
new file mode 100644
index 000000000..c817db17c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_FRU_EEPROM_H__
+#define __AMDGPU_FRU_EEPROM_H__
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
+
+#endif // __AMDGPU_FRU_EEPROM_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
new file mode 100644
index 000000000..2d4b67175
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/firmware.h>
+#include <linux/dma-mapping.h>
+
+#include "amdgpu.h"
+#include "amdgpu_fw_attestation.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+
+#define FW_ATTESTATION_DB_COOKIE 0x143b6a37
+#define FW_ATTESTATION_RECORD_VALID 1
+#define FW_ATTESTATION_MAX_SIZE 4096
+
+struct FW_ATT_DB_HEADER {
+ uint32_t AttDbVersion; /* version of the fwar feature */
+ uint32_t AttDbCookie; /* cookie as an extra check for corrupt data */
+};
+
+struct FW_ATT_RECORD {
+ uint16_t AttFwIdV1; /* Legacy FW Type field */
+ uint16_t AttFwIdV2; /* V2 FW ID field */
+ uint32_t AttFWVersion; /* FW Version */
+ uint16_t AttFWActiveFunctionID; /* The VF ID (only in VF Attestation Table) */
+ uint8_t AttSource; /* FW source indicator */
+ uint8_t RecordValid; /* Indicates whether the record is a valid entry */
+ uint32_t AttFwTaId; /* Ta ID (only in TA Attestation Table) */
+};
+
+static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
+ char __user *buf,
+ size_t size,
+ loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ uint64_t records_addr = 0;
+ uint64_t vram_pos = 0;
+ struct FW_ATT_DB_HEADER fw_att_hdr = {0};
+ struct FW_ATT_RECORD fw_att_record = {0};
+
+ if (size < sizeof(struct FW_ATT_RECORD)) {
+ DRM_WARN("FW attestation input buffer not enough memory");
+ return -EINVAL;
+ }
+
+ if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
+ DRM_WARN("FW attestation out of bounds");
+ return 0;
+ }
+
+ if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) {
+ DRM_WARN("Failed to get FW attestation record address");
+ return -EINVAL;
+ }
+
+ vram_pos = records_addr - adev->gmc.vram_start;
+
+ if (*pos == 0) {
+ amdgpu_device_vram_access(adev,
+ vram_pos,
+ (uint32_t *)&fw_att_hdr,
+ sizeof(struct FW_ATT_DB_HEADER),
+ false);
+
+ if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
+ DRM_WARN("Invalid FW attestation cookie");
+ return -EINVAL;
+ }
+
+ DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion);
+ }
+
+ amdgpu_device_vram_access(adev,
+ vram_pos + sizeof(struct FW_ATT_DB_HEADER) + *pos,
+ (uint32_t *)&fw_att_record,
+ sizeof(struct FW_ATT_RECORD),
+ false);
+
+ if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
+ return 0;
+
+ if (copy_to_user(buf, (void *)&fw_att_record, sizeof(struct FW_ATT_RECORD)))
+ return -EINVAL;
+
+ *pos += sizeof(struct FW_ATT_RECORD);
+
+ return sizeof(struct FW_ATT_RECORD);
+}
+
+static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_fw_attestation_debugfs_read,
+ .write = NULL,
+ .llseek = default_llseek
+};
+
+static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+ return 1;
+
+ return 0;
+}
+
+void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev)
+{
+ if (!amdgpu_is_fw_attestation_supported(adev))
+ return;
+
+ debugfs_create_file("amdgpu_fw_attestation",
+ 0400,
+ adev_to_drm(adev)->primary->debugfs_root,
+ adev,
+ &amdgpu_fw_attestation_debugfs_ops);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h
new file mode 100644
index 000000000..90af4fe58
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_FW_ATTESTATION_H
+#define _AMDGPU_FW_ATTESTATION_H
+
+#include "amdgpu.h"
+
+void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
new file mode 100644
index 000000000..73b8cca35
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include <drm/amdgpu_drm.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
+#include "amdgpu.h"
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+/*
+ * GART
+ * The GART (Graphics Aperture Remapping Table) is an aperture
+ * in the GPU's address space. System pages can be mapped into
+ * the aperture and look like contiguous pages from the GPU's
+ * perspective. A page table maps the pages in the aperture
+ * to the actual backing pages in system memory.
+ *
+ * Radeon GPUs support both an internal GART, as described above,
+ * and AGP. AGP works similarly, but the GART table is configured
+ * and maintained by the northbridge rather than the driver.
+ * Radeon hw has a separate AGP aperture that is programmed to
+ * point to the AGP aperture provided by the northbridge and the
+ * requests are passed through to the northbridge aperture.
+ * Both AGP and internal GART can be used at the same time, however
+ * that is not currently supported by the driver.
+ *
+ * This file handles the common internal GART management.
+ */
+
+/*
+ * Common GART table functions.
+ */
+
+/**
+ * amdgpu_gart_dummy_page_init - init dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the dummy page used by the driver (all asics).
+ * This dummy page is used by the driver as a filler for gart entries
+ * when pages are taken out of the GART
+ * Returns 0 on sucess, -ENOMEM on failure.
+ */
+static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
+{
+ struct page *dummy_page = ttm_glob.dummy_read_page;
+
+ if (adev->dummy_page_addr)
+ return 0;
+ adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
+ dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
+ adev->dummy_page_addr = 0;
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the dummy page used by the driver (all asics).
+ */
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+{
+ if (!adev->dummy_page_addr)
+ return;
+ dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = 0;
+}
+
+/**
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
+{
+ unsigned int order = get_order(adev->gart.table_size);
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+ struct amdgpu_bo *bo = NULL;
+ struct sg_table *sg = NULL;
+ struct amdgpu_bo_param bp;
+ dma_addr_t dma_addr;
+ struct page *p;
+ int ret;
+
+ if (adev->gart.bo != NULL)
+ return 0;
+
+ p = alloc_pages(gfp_flags, order);
+ if (!p)
+ return -ENOMEM;
+
+ /* If the hardware does not support UTCL2 snooping of the CPU caches
+ * then set_memory_wc() could be used as a workaround to mark the pages
+ * as write combine memory.
+ */
+ dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+ dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+ __free_pages(p, order);
+ p = NULL;
+ return -EFAULT;
+ }
+
+ dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
+ /* Create SG table */
+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+ if (ret)
+ goto error;
+
+ sg_dma_address(sg->sgl) = dma_addr;
+ sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = adev->gart.table_size;
+#endif
+ /* Create SG BO */
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.flags = 0;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+ goto error;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ WARN(ret, "Pinning the GART table failed");
+ if (ret)
+ goto error_resv;
+
+ adev->gart.bo = bo;
+ adev->gart.ptr = page_to_virt(p);
+ /* Make GART table accessible in VMID0 */
+ ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+ if (ret)
+ amdgpu_gart_table_ram_free(adev);
+ amdgpu_bo_unreserve(bo);
+
+ return 0;
+
+error_resv:
+ amdgpu_bo_unreserve(bo);
+error:
+ amdgpu_bo_unref(&bo);
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
+ __free_pages(p, order);
+ return ret;
+}
+
+/**
+ * amdgpu_gart_table_ram_free - free gart page table system ram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
+ */
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
+{
+ unsigned int order = get_order(adev->gart.table_size);
+ struct sg_table *sg = adev->gart.bo->tbo.sg;
+ struct page *p;
+ int ret;
+
+ ret = amdgpu_bo_reserve(adev->gart.bo, false);
+ if (!ret) {
+ amdgpu_bo_unpin(adev->gart.bo);
+ amdgpu_bo_unreserve(adev->gart.bo);
+ }
+ amdgpu_bo_unref(&adev->gart.bo);
+ sg_free_table(sg);
+ kfree(sg);
+ p = virt_to_page(adev->gart.ptr);
+ __free_pages(p, order);
+
+ adev->gart.ptr = NULL;
+}
+
+/**
+ * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+). These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
+{
+ if (adev->gart.bo != NULL)
+ return 0;
+
+ return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+ NULL, (void *)&adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_table_vram_free - free gart page table vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the video memory used for the GART page table
+ * (pcie r4xx, r5xx+). These asics require the gart table to
+ * be in video memory.
+ */
+void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
+}
+
+/*
+ * Common gart functions.
+ */
+/**
+ * amdgpu_gart_unbind - unbind pages from the gart page table
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to unbind
+ *
+ * Unbinds the requested pages from the gart page table and
+ * replaces them with the dummy page (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages)
+{
+ unsigned t;
+ unsigned p;
+ int i, j;
+ u64 page_base;
+ /* Starting from VEGA10, system bit must be 0 to mean invalid. */
+ uint64_t flags = 0;
+ int idx;
+
+ if (!adev->gart.ptr)
+ return;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ t = offset / AMDGPU_GPU_PAGE_SIZE;
+ p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ for (i = 0; i < pages; i++, p++) {
+ page_base = adev->dummy_page_addr;
+ if (!adev->gart.ptr)
+ continue;
+
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+ amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+ t, page_base, flags);
+ page_base += AMDGPU_GPU_PAGE_SIZE;
+ }
+ }
+ mb();
+ amdgpu_device_flush_hdp(adev, NULL);
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_map - map dma_addresses into GART entries
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ * @dst: CPU address of the gart table
+ *
+ * Map the dma_addresses into GART entries (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags,
+ void *dst)
+{
+ uint64_t page_base;
+ unsigned i, j, t;
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ t = offset / AMDGPU_GPU_PAGE_SIZE;
+
+ for (i = 0; i < pages; i++) {
+ page_base = dma_addr[i];
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+ amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
+ page_base += AMDGPU_GPU_PAGE_SIZE;
+ }
+ }
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_bind - bind pages into the gart page table
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ *
+ * Binds the requested pages to the gart page table
+ * (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr,
+ uint64_t flags)
+{
+ if (!adev->gart.ptr)
+ return;
+
+ amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gart.ptr)
+ return;
+
+ mb();
+ amdgpu_device_flush_hdp(adev, NULL);
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+}
+
+/**
+ * amdgpu_gart_init - init the driver info for managing the gart
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the dummy page and init the gart driver info (all asics).
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->dummy_page_addr)
+ return 0;
+
+ /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
+ if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("Page size is smaller than GPU page size!\n");
+ return -EINVAL;
+ }
+ r = amdgpu_gart_dummy_page_init(adev);
+ if (r)
+ return r;
+ /* Compute table size */
+ adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
+ adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
+ DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
+ adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
new file mode 100644
index 000000000..8283d682f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GART_H__
+#define __AMDGPU_GART_H__
+
+#include <linux/types.h>
+
+/*
+ * GART structures, functions & helpers
+ */
+struct amdgpu_device;
+struct amdgpu_bo;
+
+#define AMDGPU_GPU_PAGE_SIZE 4096
+#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
+#define AMDGPU_GPU_PAGE_SHIFT 12
+#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
+
+#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
+
+struct amdgpu_gart {
+ struct amdgpu_bo *bo;
+ /* CPU kmapped address of gart table */
+ void *ptr;
+ unsigned num_gpu_pages;
+ unsigned num_cpu_pages;
+ unsigned table_size;
+
+ /* Asic default pte flags */
+ uint64_t gart_pte_flags;
+};
+
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
+int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
+void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
+int amdgpu_gart_init(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+ int pages);
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags,
+ void *dst);
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
new file mode 100644
index 000000000..f6ac1e954
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GDS_H__
+#define __AMDGPU_GDS_H__
+
+struct amdgpu_ring;
+struct amdgpu_bo;
+
+struct amdgpu_gds {
+ uint32_t gds_size;
+ uint32_t gws_size;
+ uint32_t oa_size;
+ uint32_t gds_compute_max_wave_id;
+};
+
+struct amdgpu_gds_reg_offset {
+ uint32_t mem_base;
+ uint32_t mem_size;
+ uint32_t gws;
+ uint32_t oa;
+};
+
+#endif /* __AMDGPU_GDS_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
new file mode 100644
index 000000000..a1b15d0d6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "amdgpu.h"
+#include "amdgpu_display.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_xgmi.h"
+
+static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
+static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
+{
+ struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
+ struct drm_device *ddev = bo->base.dev;
+ vm_fault_t ret;
+ int idx;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ if (drm_dev_enter(ddev, &idx)) {
+ ret = amdgpu_bo_fault_reserve_notify(bo);
+ if (ret) {
+ drm_dev_exit(idx);
+ goto unlock;
+ }
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ }
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+unlock:
+ dma_resv_unlock(bo->base.resv);
+ return ret;
+}
+
+static const struct vm_operations_struct amdgpu_gem_vm_ops = {
+ .fault = amdgpu_gem_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+ .access = ttm_bo_vm_access
+};
+
+static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
+{
+ struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
+
+ if (robj) {
+ amdgpu_hmm_unregister(robj);
+ amdgpu_bo_unref(&robj);
+ }
+}
+
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+ int alignment, u32 initial_domain,
+ u64 flags, enum ttm_bo_type type,
+ struct dma_resv *resv,
+ struct drm_gem_object **obj, int8_t xcp_id_plus1)
+{
+ struct amdgpu_bo *bo;
+ struct amdgpu_bo_user *ubo;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ *obj = NULL;
+
+ bp.size = size;
+ bp.byte_align = alignment;
+ bp.type = type;
+ bp.resv = resv;
+ bp.preferred_domain = initial_domain;
+ bp.flags = flags;
+ bp.domain = initial_domain;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = xcp_id_plus1;
+
+ r = amdgpu_bo_create_user(adev, &bp, &ubo);
+ if (r)
+ return r;
+
+ bo = &ubo->bo;
+ *obj = &bo->tbo.base;
+ (*obj)->funcs = &amdgpu_gem_object_funcs;
+
+ return 0;
+}
+
+void amdgpu_gem_force_release(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct drm_file *file;
+
+ mutex_lock(&ddev->filelist_mutex);
+
+ list_for_each_entry(file, &ddev->filelist, lhead) {
+ struct drm_gem_object *gobj;
+ int handle;
+
+ WARN_ONCE(1, "Still active user space clients!\n");
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, gobj, handle) {
+ WARN_ONCE(1, "And also active allocations!\n");
+ drm_gem_object_put(gobj);
+ }
+ idr_destroy(&file->object_idr);
+ spin_unlock(&file->table_lock);
+ }
+
+ mutex_unlock(&ddev->filelist_mutex);
+}
+
+/*
+ * Call from drm_gem_handle_create which appear in both new and open ioctl
+ * case.
+ */
+static int amdgpu_gem_object_open(struct drm_gem_object *obj,
+ struct drm_file *file_priv)
+{
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va *bo_va;
+ struct mm_struct *mm;
+ int r;
+
+ mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
+ if (mm && mm != current->mm)
+ return -EPERM;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
+ abo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ return -EPERM;
+
+ r = amdgpu_bo_reserve(abo, false);
+ if (r)
+ return r;
+
+ bo_va = amdgpu_vm_bo_find(vm, abo);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, abo);
+ else
+ ++bo_va->ref_count;
+ amdgpu_bo_unreserve(abo);
+ return 0;
+}
+
+static void amdgpu_gem_object_close(struct drm_gem_object *obj,
+ struct drm_file *file_priv)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct dma_fence *fence = NULL;
+ struct amdgpu_bo_va *bo_va;
+ struct drm_exec exec;
+ long r;
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+ }
+
+ bo_va = amdgpu_vm_bo_find(vm, bo);
+ if (!bo_va || --bo_va->ref_count)
+ goto out_unlock;
+
+ amdgpu_vm_bo_del(adev, bo_va);
+ if (!amdgpu_vm_ready(vm))
+ goto out_unlock;
+
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (unlikely(r < 0))
+ dev_err(adev->dev, "failed to clear page "
+ "tables on GEM object close (%ld)\n", r);
+ if (r || !fence)
+ goto out_unlock;
+
+ amdgpu_bo_fence(bo, fence, true);
+ dma_fence_put(fence);
+
+out_unlock:
+ if (r)
+ dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+ drm_exec_fini(&exec);
+}
+
+static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+ return -EPERM;
+ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+ return -EPERM;
+
+ /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+ * for debugger access to invisible VRAM. Should have used MAP_SHARED
+ * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+ * becoming writable and makes is_cow_mapping(vm_flags) false.
+ */
+ if (is_cow_mapping(vma->vm_flags) &&
+ !(vma->vm_flags & VM_ACCESS_FLAGS))
+ vm_flags_clear(vma, VM_MAYWRITE);
+
+ return drm_gem_ttm_mmap(obj, vma);
+}
+
+static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+ .free = amdgpu_gem_object_free,
+ .open = amdgpu_gem_object_open,
+ .close = amdgpu_gem_object_close,
+ .export = amdgpu_gem_prime_export,
+ .vmap = drm_gem_ttm_vmap,
+ .vunmap = drm_gem_ttm_vunmap,
+ .mmap = amdgpu_gem_object_mmap,
+ .vm_ops = &amdgpu_gem_vm_ops,
+};
+
+/*
+ * GEM ioctls.
+ */
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ union drm_amdgpu_gem_create *args = data;
+ uint64_t flags = args->in.domain_flags;
+ uint64_t size = args->in.bo_size;
+ struct dma_resv *resv = NULL;
+ struct drm_gem_object *gobj;
+ uint32_t handle, initial_domain;
+ int r;
+
+ /* reject DOORBELLs until userspace code to use it is available */
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
+ return -EINVAL;
+
+ /* reject invalid gem flags */
+ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_VRAM_CLEARED |
+ AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
+ AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
+ AMDGPU_GEM_CREATE_ENCRYPTED |
+ AMDGPU_GEM_CREATE_DISCARDABLE))
+ return -EINVAL;
+
+ /* reject invalid gem domains */
+ if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
+ return -EINVAL;
+
+ if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
+ DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n");
+ return -EINVAL;
+ }
+
+ /* create a gem object to contain this object in */
+ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
+ AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+ if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+ /* if gds bo is created from user space, it must be
+ * passed to bo list
+ */
+ DRM_ERROR("GDS bo cannot be per-vm-bo\n");
+ return -EINVAL;
+ }
+ flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ }
+
+ if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ resv = vm->root.bo->tbo.base.resv;
+ }
+
+ initial_domain = (u32)(0xffffffff & args->in.domains);
+retry:
+ r = amdgpu_gem_object_create(adev, size, args->in.alignment,
+ initial_domain,
+ flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
+ if (r && r != -ERESTARTSYS) {
+ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+ flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ goto retry;
+ }
+
+ if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+ goto retry;
+ }
+ DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+ size, initial_domain, args->in.alignment, r);
+ }
+
+ if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+ if (!r) {
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+ abo->parent = amdgpu_bo_ref(vm->root.bo);
+ }
+ amdgpu_bo_unreserve(vm->root.bo);
+ }
+ if (r)
+ return r;
+
+ r = drm_gem_handle_create(filp, gobj, &handle);
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(gobj);
+ if (r)
+ return r;
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = handle;
+ return 0;
+}
+
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_amdgpu_gem_userptr *args = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct drm_gem_object *gobj;
+ struct hmm_range *range;
+ struct amdgpu_bo *bo;
+ uint32_t handle;
+ int r;
+
+ args->addr = untagged_addr(args->addr);
+
+ if (offset_in_page(args->addr | args->size))
+ return -EINVAL;
+
+ /* reject unknown flag values */
+ if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY |
+ AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE |
+ AMDGPU_GEM_USERPTR_REGISTER))
+ return -EINVAL;
+
+ if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+ !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+
+ /* if we want to write to it we must install a MMU notifier */
+ return -EACCES;
+ }
+
+ /* create a gem object to contain this object in */
+ r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+ 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
+ if (r)
+ return r;
+
+ bo = gem_to_amdgpu_bo(gobj);
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ r = amdgpu_ttm_tt_set_userptr(&bo->tbo, args->addr, args->flags);
+ if (r)
+ goto release_object;
+
+ r = amdgpu_hmm_register(bo, args->addr);
+ if (r)
+ goto release_object;
+
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
+ r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
+ &range);
+ if (r)
+ goto release_object;
+
+ r = amdgpu_bo_reserve(bo, true);
+ if (r)
+ goto user_pages_done;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (r)
+ goto user_pages_done;
+ }
+
+ r = drm_gem_handle_create(filp, gobj, &handle);
+ if (r)
+ goto user_pages_done;
+
+ args->handle = handle;
+
+user_pages_done:
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+
+release_object:
+ drm_gem_object_put(gobj);
+
+ return r;
+}
+
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+ struct drm_device *dev,
+ uint32_t handle, uint64_t *offset_p)
+{
+ struct drm_gem_object *gobj;
+ struct amdgpu_bo *robj;
+
+ gobj = drm_gem_object_lookup(filp, handle);
+ if (!gobj)
+ return -ENOENT;
+
+ robj = gem_to_amdgpu_bo(gobj);
+ if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
+ (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
+ drm_gem_object_put(gobj);
+ return -EPERM;
+ }
+ *offset_p = amdgpu_bo_mmap_offset(robj);
+ drm_gem_object_put(gobj);
+ return 0;
+}
+
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_gem_mmap *args = data;
+ uint32_t handle = args->in.handle;
+
+ memset(args, 0, sizeof(*args));
+ return amdgpu_mode_dumb_mmap(filp, dev, handle, &args->out.addr_ptr);
+}
+
+/**
+ * amdgpu_gem_timeout - calculate jiffies timeout from absolute value
+ *
+ * @timeout_ns: timeout in ns
+ *
+ * Calculate the timeout in jiffies from an absolute timeout in ns.
+ */
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
+{
+ unsigned long timeout_jiffies;
+ ktime_t timeout;
+
+ /* clamp timeout if it's to large */
+ if (((int64_t)timeout_ns) < 0)
+ return MAX_SCHEDULE_TIMEOUT;
+
+ timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
+ if (ktime_to_ns(timeout) < 0)
+ return 0;
+
+ timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
+ /* clamp timeout to avoid unsigned-> signed overflow */
+ if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
+ return MAX_SCHEDULE_TIMEOUT - 1;
+
+ return timeout_jiffies;
+}
+
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_gem_wait_idle *args = data;
+ struct drm_gem_object *gobj;
+ struct amdgpu_bo *robj;
+ uint32_t handle = args->in.handle;
+ unsigned long timeout = amdgpu_gem_timeout(args->in.timeout);
+ int r = 0;
+ long ret;
+
+ gobj = drm_gem_object_lookup(filp, handle);
+ if (!gobj)
+ return -ENOENT;
+
+ robj = gem_to_amdgpu_bo(gobj);
+ ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+ true, timeout);
+
+ /* ret == 0 means not signaled,
+ * ret > 0 means signaled
+ * ret < 0 means interrupted before timeout
+ */
+ if (ret >= 0) {
+ memset(args, 0, sizeof(*args));
+ args->out.status = (ret == 0);
+ } else
+ r = ret;
+
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_gem_metadata *args = data;
+ struct drm_gem_object *gobj;
+ struct amdgpu_bo *robj;
+ int r = -1;
+
+ DRM_DEBUG("%d\n", args->handle);
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (gobj == NULL)
+ return -ENOENT;
+ robj = gem_to_amdgpu_bo(gobj);
+
+ r = amdgpu_bo_reserve(robj, false);
+ if (unlikely(r != 0))
+ goto out;
+
+ if (args->op == AMDGPU_GEM_METADATA_OP_GET_METADATA) {
+ amdgpu_bo_get_tiling_flags(robj, &args->data.tiling_info);
+ r = amdgpu_bo_get_metadata(robj, args->data.data,
+ sizeof(args->data.data),
+ &args->data.data_size_bytes,
+ &args->data.flags);
+ } else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) {
+ if (args->data.data_size_bytes > sizeof(args->data.data)) {
+ r = -EINVAL;
+ goto unreserve;
+ }
+ r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info);
+ if (!r)
+ r = amdgpu_bo_set_metadata(robj, args->data.data,
+ args->data.data_size_bytes,
+ args->data.flags);
+ }
+
+unreserve:
+ amdgpu_bo_unreserve(robj);
+out:
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+/**
+ * amdgpu_gem_va_update_vm -update the bo_va in its VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm to update
+ * @bo_va: bo_va to update
+ * @operation: map, unmap or clear
+ *
+ * Update the bo_va directly after setting its address. Errors are not
+ * vital here, so they are not reported back to userspace.
+ */
+static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation)
+{
+ int r;
+
+ if (!amdgpu_vm_ready(vm))
+ return;
+
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (r)
+ goto error;
+
+ if (operation == AMDGPU_VA_OP_MAP ||
+ operation == AMDGPU_VA_OP_REPLACE) {
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ goto error;
+ }
+
+ r = amdgpu_vm_update_pdes(adev, vm, false);
+
+error:
+ if (r && r != -ERESTARTSYS)
+ DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+}
+
+/**
+ * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: GEM UAPI flags
+ *
+ * Returns the GEM UAPI flags mapped into hardware for the ASIC.
+ */
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+ if (flags & AMDGPU_VM_PAGE_NOALLOC)
+ pte_flag |= AMDGPU_PTE_NOALLOC;
+
+ if (adev->gmc.gmc_funcs->map_mtype)
+ pte_flag |= amdgpu_gmc_map_mtype(adev,
+ flags & AMDGPU_VM_MTYPE_MASK);
+
+ return pte_flag;
+}
+
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
+ AMDGPU_VM_PAGE_NOALLOC;
+ const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
+ AMDGPU_VM_PAGE_PRT;
+
+ struct drm_amdgpu_gem_va *args = data;
+ struct drm_gem_object *gobj;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_bo *abo;
+ struct amdgpu_bo_va *bo_va;
+ struct drm_exec exec;
+ uint64_t va_flags;
+ uint64_t vm_size;
+ int r = 0;
+
+ if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
+ dev_dbg(dev->dev,
+ "va_address 0x%llx is in reserved area 0x%llx\n",
+ args->va_address, AMDGPU_VA_RESERVED_SIZE);
+ return -EINVAL;
+ }
+
+ if (args->va_address >= AMDGPU_GMC_HOLE_START &&
+ args->va_address < AMDGPU_GMC_HOLE_END) {
+ dev_dbg(dev->dev,
+ "va_address 0x%llx is in VA hole 0x%llx-0x%llx\n",
+ args->va_address, AMDGPU_GMC_HOLE_START,
+ AMDGPU_GMC_HOLE_END);
+ return -EINVAL;
+ }
+
+ args->va_address &= AMDGPU_GMC_HOLE_MASK;
+
+ vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ if (args->va_address + args->map_size > vm_size) {
+ dev_dbg(dev->dev,
+ "va_address 0x%llx is in top reserved area 0x%llx\n",
+ args->va_address + args->map_size, vm_size);
+ return -EINVAL;
+ }
+
+ if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
+ dev_dbg(dev->dev, "invalid flags combination 0x%08X\n",
+ args->flags);
+ return -EINVAL;
+ }
+
+ switch (args->operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ case AMDGPU_VA_OP_REPLACE:
+ break;
+ default:
+ dev_dbg(dev->dev, "unsupported operation %d\n",
+ args->operation);
+ return -EINVAL;
+ }
+
+ if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
+ !(args->flags & AMDGPU_VM_PAGE_PRT)) {
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (gobj == NULL)
+ return -ENOENT;
+ abo = gem_to_amdgpu_bo(gobj);
+ } else {
+ gobj = NULL;
+ abo = NULL;
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_until_all_locked(&exec) {
+ if (gobj) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ if (abo) {
+ bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
+ if (!bo_va) {
+ r = -ENOENT;
+ goto error;
+ }
+ } else if (args->operation != AMDGPU_VA_OP_CLEAR) {
+ bo_va = fpriv->prt_va;
+ } else {
+ bo_va = NULL;
+ }
+
+ switch (args->operation) {
+ case AMDGPU_VA_OP_MAP:
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
+ r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
+ args->offset_in_bo, args->map_size,
+ va_flags);
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
+ break;
+
+ case AMDGPU_VA_OP_CLEAR:
+ r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
+ args->va_address,
+ args->map_size);
+ break;
+ case AMDGPU_VA_OP_REPLACE:
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
+ r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
+ args->offset_in_bo, args->map_size,
+ va_flags);
+ break;
+ default:
+ break;
+ }
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
+ amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+ args->operation);
+
+error:
+ drm_exec_fini(&exec);
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_amdgpu_gem_op *args = data;
+ struct drm_gem_object *gobj;
+ struct amdgpu_vm_bo_base *base;
+ struct amdgpu_bo *robj;
+ int r;
+
+ gobj = drm_gem_object_lookup(filp, args->handle);
+ if (!gobj)
+ return -ENOENT;
+
+ robj = gem_to_amdgpu_bo(gobj);
+
+ r = amdgpu_bo_reserve(robj, false);
+ if (unlikely(r))
+ goto out;
+
+ switch (args->op) {
+ case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
+ struct drm_amdgpu_gem_create_in info;
+ void __user *out = u64_to_user_ptr(args->value);
+
+ info.bo_size = robj->tbo.base.size;
+ info.alignment = robj->tbo.page_alignment << PAGE_SHIFT;
+ info.domains = robj->preferred_domains;
+ info.domain_flags = robj->flags;
+ amdgpu_bo_unreserve(robj);
+ if (copy_to_user(out, &info, sizeof(info)))
+ r = -EFAULT;
+ break;
+ }
+ case AMDGPU_GEM_OP_SET_PLACEMENT:
+ if (robj->tbo.base.import_attach &&
+ args->value & AMDGPU_GEM_DOMAIN_VRAM) {
+ r = -EINVAL;
+ amdgpu_bo_unreserve(robj);
+ break;
+ }
+ if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
+ r = -EPERM;
+ amdgpu_bo_unreserve(robj);
+ break;
+ }
+ for (base = robj->vm_bo; base; base = base->next)
+ if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev),
+ amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) {
+ r = -EINVAL;
+ amdgpu_bo_unreserve(robj);
+ goto out;
+ }
+
+
+ robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
+ robj->allowed_domains = robj->preferred_domains;
+ if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+ robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
+ if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+ amdgpu_vm_bo_invalidate(adev, robj, true);
+
+ amdgpu_bo_unreserve(robj);
+ break;
+ default:
+ amdgpu_bo_unreserve(robj);
+ r = -EINVAL;
+ }
+
+out:
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
+ int width,
+ int cpp,
+ bool tiled)
+{
+ int aligned = width;
+ int pitch_mask = 0;
+
+ switch (cpp) {
+ case 1:
+ pitch_mask = 255;
+ break;
+ case 2:
+ pitch_mask = 127;
+ break;
+ case 3:
+ case 4:
+ pitch_mask = 63;
+ break;
+ }
+
+ aligned += pitch_mask;
+ aligned &= ~pitch_mask;
+ return aligned * cpp;
+}
+
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct drm_gem_object *gobj;
+ uint32_t handle;
+ u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ u32 domain;
+ int r;
+
+ /*
+ * The buffer returned from this function should be cleared, but
+ * it can only be done if the ring is enabled or we'll fail to
+ * create the buffer.
+ */
+ if (adev->mman.buffer_funcs_enabled)
+ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+ args->pitch = amdgpu_gem_align_pitch(adev, args->width,
+ DIV_ROUND_UP(args->bpp, 8), 0);
+ args->size = (u64)args->pitch * args->height;
+ args->size = ALIGN(args->size, PAGE_SIZE);
+ domain = amdgpu_bo_get_preferred_domain(adev,
+ amdgpu_display_supported_domains(adev, flags));
+ r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
+ ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
+ if (r)
+ return -ENOMEM;
+
+ r = drm_gem_handle_create(file_priv, gobj, &handle);
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(gobj);
+ if (r)
+ return r;
+
+ args->handle = handle;
+ return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_file *file;
+ int r;
+
+ r = mutex_lock_interruptible(&dev->filelist_mutex);
+ if (r)
+ return r;
+
+ list_for_each_entry(file, &dev->filelist, lhead) {
+ struct task_struct *task;
+ struct drm_gem_object *gobj;
+ struct pid *pid;
+ int id;
+
+ /*
+ * Although we have a valid reference on file->pid, that does
+ * not guarantee that the task_struct who called get_pid() is
+ * still alive (e.g. get_pid(current) => fork() => exit()).
+ * Therefore, we need to protect this ->comm access using RCU.
+ */
+ rcu_read_lock();
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
+ task ? task->comm : "<unknown>");
+ rcu_read_unlock();
+
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+
+ amdgpu_bo_print_info(id, bo, m);
+ }
+ spin_unlock(&file->table_lock);
+ }
+
+ mutex_unlock(&dev->filelist_mutex);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_gem_info);
+
+#endif
+
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_gem_info", 0444, root, adev,
+ &amdgpu_debugfs_gem_info_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
new file mode 100644
index 000000000..f30264782
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GEM_H__
+#define __AMDGPU_GEM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+
+/*
+ * GEM.
+ */
+
+#define AMDGPU_GEM_DOMAIN_MAX 0x3
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+
+/*
+ * GEM objects.
+ */
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+ int alignment, u32 initial_domain,
+ u64 flags, enum ttm_bo_type type,
+ struct dma_resv *resv,
+ struct drm_gem_object **obj, int8_t xcp_id_plus1);
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+ struct drm_device *dev,
+ uint32_t handle, uint64_t *offset_p);
+
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
new file mode 100644
index 000000000..ef4cb9217
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -0,0 +1,1311 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_rlc.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xcp.h"
+
+/* delay 0.1 second to enable gfx off feature */
+#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
+
+#define GFX_OFF_NO_DELAY 0
+
+/*
+ * GPU GFX IP block helpers function.
+ */
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue)
+{
+ int bit = 0;
+
+ bit += mec * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+ bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+ bit += queue;
+
+ return bit;
+}
+
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue)
+{
+ *queue = bit % adev->gfx.mec.num_queue_per_pipe;
+ *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+}
+
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
+ adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
+}
+
+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
+{
+ int bit = 0;
+
+ bit += me * adev->gfx.me.num_pipe_per_me
+ * adev->gfx.me.num_queue_per_pipe;
+ bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ bit += queue;
+
+ return bit;
+}
+
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+ int *me, int *pipe, int *queue)
+{
+ *queue = bit % adev->gfx.me.num_queue_per_pipe;
+ *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
+ % adev->gfx.me.num_pipe_per_me;
+ *me = (bit / adev->gfx.me.num_queue_per_pipe)
+ / adev->gfx.me.num_pipe_per_me;
+}
+
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
+{
+ return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
+ adev->gfx.me.queue_bitmap);
+}
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
+{
+ unsigned int se, sh, cu;
+ const char *p;
+
+ memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+ if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+ return;
+
+ p = amdgpu_disable_cu;
+ for (;;) {
+ char *next;
+ int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+
+ if (ret < 3) {
+ DRM_ERROR("amdgpu: could not parse disable_cu\n");
+ return;
+ }
+
+ if (se < max_se && sh < max_sh && cu < 16) {
+ DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+ mask[se * max_sh + sh] |= 1u << cu;
+ } else {
+ DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+ se, sh, cu);
+ }
+
+ next = strchr(p, ',');
+ if (!next)
+ break;
+ p = next + 1;
+ }
+}
+
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+ return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
+{
+ if (amdgpu_compute_multipipe != -1) {
+ DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
+ amdgpu_compute_multipipe);
+ return amdgpu_compute_multipipe == 1;
+ }
+
+ if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ return true;
+
+ /* FIXME: spreading the queues across pipes causes perf regressions
+ * on POLARIS11 compute workloads */
+ if (adev->asic_type == CHIP_POLARIS11)
+ return false;
+
+ return adev->gfx.mec.num_mec > 1;
+}
+
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ int queue = ring->queue;
+ int pipe = ring->pipe;
+
+ /* Policy: use pipe1 queue0 as high priority graphics queue if we
+ * have more than one gfx pipe.
+ */
+ if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+ adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+ int me = ring->me;
+ int bit;
+
+ bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+ if (ring == &adev->gfx.gfx_ring[bit])
+ return true;
+ }
+
+ return false;
+}
+
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ /* Policy: use 1st queue as high priority compute queue if we
+ * have more than one compute queue.
+ */
+ if (adev->gfx.num_compute_rings > 1 &&
+ ring == &adev->gfx.compute_ring[0])
+ return true;
+
+ return false;
+}
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, j, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
+ int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe,
+ adev->gfx.num_compute_rings);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+
+ if (multipipe_policy) {
+ /* policy: make queues evenly cross all pipes on MEC1 only
+ * for multiple xcc, just use the original policy for simplicity */
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
+ }
+ } else {
+ /* policy: amdgpu owns all queues in the given pipe */
+ for (j = 0; j < num_xcc; j++) {
+ for (i = 0; i < max_queues_per_mec; ++i)
+ set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+ }
+ }
+
+ for (j = 0; j < num_xcc; j++) {
+ dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+ bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+ }
+}
+
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ if (multipipe_policy) {
+ /* policy: amdgpu owns the first queue per pipe at this stage
+ * will extend to mulitple queues per pipe later */
+ for (i = 0; i < max_queues_per_me; i++) {
+ pipe = i % adev->gfx.me.num_pipe_per_me;
+ queue = (i / adev->gfx.me.num_pipe_per_me) %
+ adev->gfx.me.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+ adev->gfx.me.queue_bitmap);
+ }
+ } else {
+ for (i = 0; i < max_queues_per_me; ++i)
+ set_bit(i, adev->gfx.me.queue_bitmap);
+ }
+
+ /* update the number of active graphics rings */
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+}
+
+static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring, int xcc_id)
+{
+ int queue_bit;
+ int mec, pipe, queue;
+
+ queue_bit = adev->gfx.mec.num_mec
+ * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+
+ while (--queue_bit >= 0) {
+ if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
+ continue;
+
+ amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+ /*
+ * 1. Using pipes 2/3 from MEC 2 seems cause problems.
+ * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
+ * only can be issued on queue 0.
+ */
+ if ((mec == 1 && pipe > 1) || queue != 0)
+ continue;
+
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ return 0;
+ }
+
+ dev_err(adev->dev, "Failed to find a queue for KIQ\n");
+ return -EINVAL;
+}
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ int r = 0;
+
+ spin_lock_init(&kiq->ring_lock);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ ring->doorbell_index =
+ (adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
+ if (r)
+ return r;
+
+ ring->eop_gpu_addr = kiq->eop_gpu_addr;
+ ring->no_scheduler = true;
+ sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
+ r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+
+ return r;
+}
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_fini(ring);
+}
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+ amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
+}
+
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+ unsigned int hpd_size, int xcc_id)
+{
+ int r;
+ u32 *hpd;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+ r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
+ &kiq->eop_gpu_addr, (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
+ return r;
+ }
+
+ memset(hpd, 0, hpd_size);
+
+ r = amdgpu_bo_reserve(kiq->eop_obj, true);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
+ amdgpu_bo_kunmap(kiq->eop_obj);
+ amdgpu_bo_unreserve(kiq->eop_obj);
+
+ return 0;
+}
+
+/* create MQD for each compute/gfx queue */
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+ unsigned int mqd_size, int xcc_id)
+{
+ int r, i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *ring = &kiq->ring;
+ u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
+ /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+ domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
+
+ /* create MQD for KIQ */
+ if (!adev->enable_mes_kiq && !ring->mqd_obj) {
+ /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
+ * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
+ * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
+ * KIQ MQD no matter SRIOV or Bare-metal
+ */
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ return r;
+ }
+
+ /* prepare MQD backup */
+ kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+ if (!kiq->mqd_backup) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
+ }
+
+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ /* create MQD for each KGQ */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ domain, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ ring->mqd_size = mqd_size;
+ /* prepare MQD backup */
+ adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.me.mqd_backup[i]) {
+ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
+ }
+ }
+ }
+
+ /* create MQD for each KCQ */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ domain, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ ring->mqd_size = mqd_size;
+ /* prepare MQD backup */
+ adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.mec.mqd_backup[j]) {
+ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring = NULL;
+ int i, j;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+ if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ kfree(adev->gfx.me.mqd_backup[i]);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+ }
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ ring = &adev->gfx.compute_ring[j];
+ kfree(adev->gfx.mec.mqd_backup[j]);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+ }
+
+ ring = &kiq->ring;
+ kfree(kiq->mqd_backup);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+}
+
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i, r = 0;
+ int j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock(&kiq->ring_lock);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_compute_rings)) {
+ spin_unlock(&kiq->ring_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.compute_ring[j],
+ RESET_QUEUES, 0, 0);
+ }
+
+ if (kiq_ring->sched.ready && !adev->job_hang)
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+
+ return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i, r = 0;
+ int j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock(&kiq->ring_lock);
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_gfx_rings)) {
+ spin_unlock(&kiq->ring_lock);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_unmap_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+
+ return r;
+}
+
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+ int queue_bit)
+{
+ int mec, pipe, queue;
+ int set_resource_bit = 0;
+
+ amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+ set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
+
+ return set_resource_bit;
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ uint64_t queue_mask = 0;
+ int r, i, j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
+ return -EINVAL;
+
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
+ continue;
+
+ /* This situation may be hit in the future if a new HW
+ * generation exposes more than 64 queues. If so, the
+ * definition of queue_mask needs updating */
+ if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+ DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ break;
+ }
+
+ queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
+ }
+
+ DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
+ kiq_ring->queue);
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ spin_lock(&kiq->ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_compute_rings +
+ kiq->pmf->set_resources_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ if (adev->enable_mes)
+ queue_mask = ~0ULL;
+
+ kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.compute_ring[j]);
+ }
+
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ DRM_ERROR("KCQ enable failed\n");
+
+ return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int r, i, j;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ spin_lock(&kiq->ring_lock);
+ /* No need to map kcq on the slave */
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_gfx_rings);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.gfx_ring[j]);
+ }
+ }
+
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ DRM_ERROR("KCQ enable failed\n");
+
+ return r;
+}
+
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ */
+
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+ unsigned long delay = GFX_OFF_DELAY_ENABLE;
+
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+ return;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+ goto unlock;
+
+ adev->gfx.gfx_off_req_count--;
+
+ if (adev->gfx.gfx_off_req_count == 0 &&
+ !adev->gfx.gfx_off_state) {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ delay);
+ }
+ } else {
+ if (adev->gfx.gfx_off_req_count == 0) {
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+ if (adev->gfx.gfx_off_state &&
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ adev->gfx.gfx_off_state = false;
+
+ if (adev->gfx.funcs->init_spm_golden) {
+ dev_dbg(adev->dev,
+ "GFXOFF is disabled, re-init SPM golden settings\n");
+ amdgpu_gfx_init_spm_golden(adev);
+ }
+ }
+ }
+
+ adev->gfx.gfx_off_req_count++;
+ }
+
+unlock:
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+}
+
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
+{
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
+{
+
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = amdgpu_dpm_get_status_gfxoff(adev, value);
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
+}
+
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (adev->gfx.cp_ecc_error_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ } else {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err = 0;
+ struct amdgpu_gfx_ras *ras = NULL;
+
+ /* adev->gfx.ras is NULL, which means gfx does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->gfx.ras)
+ return 0;
+
+ ras = adev->gfx.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register gfx ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "gfx");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gfx.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use gfx default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
+ return adev->gfx.ras->poison_consumption_handler(adev, entry);
+
+ return 0;
+}
+
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ /* TODO ue will trigger an interrupt.
+ *
+ * When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ DRM_ERROR("CP ECC ERROR IRQ\n");
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id))
+{
+ int i;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ if (err_data) {
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+ }
+
+ for_each_inst(i, xcc_mask)
+ func(adev, ras_error_status, i);
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq, reg_val_offs = 0, value = 0;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (adev->mes.ring.sched.ready)
+ return amdgpu_mes_rreg(adev, reg);
+
+ BUG_ON(!ring->funcs->emit_rreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+ pr_err("critical bug! too many kiq readers\n");
+ goto failed_unlock;
+ }
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_read;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ mb();
+ value = adev->wb.wb[reg_val_offs];
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ return value;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_read:
+ if (reg_val_offs)
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ dev_err(adev->dev, "failed to read reg:%x\n", reg);
+ return ~0;
+}
+
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_wreg);
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if (adev->mes.ring.sched.ready) {
+ amdgpu_mes_wreg(adev, reg, v);
+ return;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_wreg(ring, reg, v);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_write;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_write;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_write:
+ dev_err(adev->dev, "failed to write reg:%x\n", reg);
+}
+
+int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
+{
+ if (amdgpu_num_kcq == -1) {
+ return 8;
+ } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+ dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
+ return 8;
+ }
+ return amdgpu_num_kcq;
+}
+
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+ uint32_t ucode_id)
+{
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct firmware *ucode_fw;
+ unsigned int fw_size;
+
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_CP_PFP:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.ce_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ default:
+ break;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[ucode_id];
+ info->ucode_id = ucode_id;
+ info->fw = ucode_fw;
+ adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+ }
+}
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+ return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+ adev->gfx.num_xcc_per_xcp : 1));
+}
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int mode;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+
+ return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_gfx_partition mode;
+ int ret = 0, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (num_xcc % 2 != 0)
+ return -EINVAL;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+ /*
+ * DPX mode needs AIDs to be in multiple of 2.
+ * Each AID connects 2 XCCs.
+ */
+ if (num_xcc%4)
+ return -EINVAL;
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+ if (num_xcc != 6)
+ return -EINVAL;
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+ if (num_xcc != 8)
+ return -EINVAL;
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ } else {
+ return -EINVAL;
+ }
+
+ ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ char *supported_partition;
+
+ /* TBD */
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ supported_partition = "SPX, DPX, QPX, CPX";
+ break;
+ case 6:
+ supported_partition = "SPX, TPX, CPX";
+ break;
+ case 4:
+ supported_partition = "SPX, DPX, CPX";
+ break;
+ /* this seems only existing in emulation phase */
+ case 2:
+ supported_partition = "SPX, CPX";
+ break;
+ default:
+ supported_partition = "Not supported";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s\n", supported_partition);
+}
+
+static DEVICE_ATTR(current_compute_partition, 0644,
+ amdgpu_gfx_get_current_compute_partition,
+ amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, 0444,
+ amdgpu_gfx_get_available_compute_partition, NULL);
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+ if (r)
+ return r;
+
+ r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+ device_remove_file(adev->dev, &dev_attr_available_compute_partition);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
new file mode 100644
index 000000000..0ca95c4d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -0,0 +1,566 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GFX_H__
+#define __AMDGPU_GFX_H__
+
+/*
+ * GFX stuff
+ */
+#include "clearstate_defs.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_rlc.h"
+#include "amdgpu_imu.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
+
+/* GFX current status */
+#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
+#define AMDGPU_GFX_SAFE_MODE 0x00000001L
+#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
+#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
+#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
+
+#define AMDGPU_MAX_GC_INSTANCES 8
+#define KGD_MAX_QUEUES 128
+
+#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
+enum amdgpu_gfx_pipe_priority {
+ AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
+ AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
+};
+
+#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
+#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
+
+enum amdgpu_gfx_partition {
+ AMDGPU_SPX_PARTITION_MODE = 0,
+ AMDGPU_DPX_PARTITION_MODE = 1,
+ AMDGPU_TPX_PARTITION_MODE = 2,
+ AMDGPU_QPX_PARTITION_MODE = 3,
+ AMDGPU_CPX_PARTITION_MODE = 4,
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+ /* Automatically choose the right mode */
+ AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
+};
+
+#define NUM_XCC(x) hweight16(x)
+
+enum amdgpu_pkg_type {
+ AMDGPU_PKG_TYPE_APU = 2,
+ AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+enum amdgpu_gfx_ras_mem_id_type {
+ AMDGPU_GFX_CP_MEM = 0,
+ AMDGPU_GFX_GCEA_MEM,
+ AMDGPU_GFX_GC_CANE_MEM,
+ AMDGPU_GFX_GCUTCL2_MEM,
+ AMDGPU_GFX_GDS_MEM,
+ AMDGPU_GFX_LDS_MEM,
+ AMDGPU_GFX_RLC_MEM,
+ AMDGPU_GFX_SP_MEM,
+ AMDGPU_GFX_SPI_MEM,
+ AMDGPU_GFX_SQC_MEM,
+ AMDGPU_GFX_SQ_MEM,
+ AMDGPU_GFX_TA_MEM,
+ AMDGPU_GFX_TCC_MEM,
+ AMDGPU_GFX_TCA_MEM,
+ AMDGPU_GFX_TCI_MEM,
+ AMDGPU_GFX_TCP_MEM,
+ AMDGPU_GFX_TD_MEM,
+ AMDGPU_GFX_TCX_MEM,
+ AMDGPU_GFX_ATC_L2_MEM,
+ AMDGPU_GFX_UTCL2_MEM,
+ AMDGPU_GFX_VML2_MEM,
+ AMDGPU_GFX_VML2_WALKER_MEM,
+ AMDGPU_GFX_MEM_TYPE_NUM
+};
+
+struct amdgpu_mec {
+ struct amdgpu_bo *hpd_eop_obj;
+ u64 hpd_eop_gpu_addr;
+ struct amdgpu_bo *mec_fw_obj;
+ u64 mec_fw_gpu_addr;
+ struct amdgpu_bo *mec_fw_data_obj;
+ u64 mec_fw_data_gpu_addr;
+
+ u32 num_mec;
+ u32 num_pipe_per_mec;
+ u32 num_queue_per_pipe;
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+};
+
+struct amdgpu_mec_bitmap {
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+enum amdgpu_unmap_queues_action {
+ PREEMPT_QUEUES = 0,
+ RESET_QUEUES,
+ DISABLE_PROCESS_QUEUES,
+ PREEMPT_QUEUES_NO_UNMAP,
+};
+
+struct kiq_pm4_funcs {
+ /* Support ASIC-specific kiq pm4 packets*/
+ void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask);
+ void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring);
+ void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+ void (*kiq_query_status)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq);
+ void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub);
+ /* Packet sizes */
+ int set_resources_size;
+ int map_queues_size;
+ int unmap_queues_size;
+ int query_status_size;
+ int invalidate_tlbs_size;
+};
+
+struct amdgpu_kiq {
+ u64 eop_gpu_addr;
+ struct amdgpu_bo *eop_obj;
+ spinlock_t ring_lock;
+ struct amdgpu_ring ring;
+ struct amdgpu_irq_src irq;
+ const struct kiq_pm4_funcs *pmf;
+ void *mqd_backup;
+};
+
+/*
+ * GFX configurations
+ */
+#define AMDGPU_GFX_MAX_SE 4
+#define AMDGPU_GFX_MAX_SH_PER_SE 2
+
+struct amdgpu_rb_config {
+ uint32_t rb_backend_disable;
+ uint32_t user_rb_backend_disable;
+ uint32_t raster_config;
+ uint32_t raster_config_1;
+};
+
+struct gb_addr_config {
+ uint16_t pipe_interleave_size;
+ uint8_t num_pipes;
+ uint8_t max_compress_frags;
+ uint8_t num_banks;
+ uint8_t num_se;
+ uint8_t num_rb_per_se;
+ uint8_t num_pkrs;
+};
+
+struct amdgpu_gfx_config {
+ unsigned max_shader_engines;
+ unsigned max_tile_pipes;
+ unsigned max_cu_per_sh;
+ unsigned max_sh_per_se;
+ unsigned max_backends_per_se;
+ unsigned max_texture_channel_caches;
+ unsigned max_gprs;
+ unsigned max_gs_threads;
+ unsigned max_hw_contexts;
+ unsigned sc_prim_fifo_size_frontend;
+ unsigned sc_prim_fifo_size_backend;
+ unsigned sc_hiz_tile_fifo_size;
+ unsigned sc_earlyz_tile_fifo_size;
+
+ unsigned num_tile_pipes;
+ unsigned backend_enable_mask;
+ unsigned mem_max_burst_length_bytes;
+ unsigned mem_row_size_in_kb;
+ unsigned shader_engine_tile_size;
+ unsigned num_gpus;
+ unsigned multi_gpu_tile_size;
+ unsigned mc_arb_ramcfg;
+ unsigned num_banks;
+ unsigned num_ranks;
+ unsigned gb_addr_config;
+ unsigned num_rbs;
+ unsigned gs_vgt_table_depth;
+ unsigned gs_prim_buffer_depth;
+
+ uint32_t tile_mode_array[32];
+ uint32_t macrotile_mode_array[16];
+
+ struct gb_addr_config gb_addr_config_fields;
+ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+ /* gfx configure feature */
+ uint32_t double_offchip_lds_buf;
+ /* cached value of DB_DEBUG2 */
+ uint32_t db_debug2;
+ /* gfx10 specific config */
+ uint32_t num_sc_per_sh;
+ uint32_t num_packer_per_sc;
+ uint32_t pa_sc_tile_steering_override;
+ /* Whether texture coordinate truncation is conformant. */
+ bool ta_cntl2_truncate_coord_mode;
+ uint64_t tcc_disabled_mask;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+ uint32_t gc_num_tcp_per_wpg;
+ uint32_t gc_tcp_l1_size;
+ uint32_t gc_num_sqc_per_wgp;
+ uint32_t gc_l1_instruction_cache_size_per_sqc;
+ uint32_t gc_l1_data_cache_size_per_sqc;
+ uint32_t gc_gl1c_per_sa;
+ uint32_t gc_gl1c_size_per_instance;
+ uint32_t gc_gl2c_per_gpu;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_num_cu_per_sqc;
+ uint32_t gc_tcc_size;
+};
+
+struct amdgpu_cu_info {
+ uint32_t simd_per_cu;
+ uint32_t max_waves_per_simd;
+ uint32_t wave_front_size;
+ uint32_t max_scratch_slots_per_cu;
+ uint32_t lds_size;
+
+ /* total active CU number */
+ uint32_t number;
+ uint32_t ao_cu_mask;
+ uint32_t ao_cu_bitmap[4][4];
+ uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
+};
+
+struct amdgpu_gfx_ras {
+ struct amdgpu_ras_block_object ras_block;
+ void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+ bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
+ int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+ int (*poison_consumption_handler)(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_gfx_shadow_info {
+ u32 shadow_size;
+ u32 shadow_alignment;
+ u32 csa_size;
+ u32 csa_alignment;
+};
+
+struct amdgpu_gfx_funcs {
+ /* get the gpu clock counter */
+ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+ void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+ void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t *dst, int *no_fields);
+ void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread, uint32_t start,
+ uint32_t size, uint32_t *dst);
+ void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start, uint32_t size,
+ uint32_t *dst);
+ void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
+ u32 queue, u32 vmid, u32 xcc_id);
+ void (*init_spm_golden)(struct amdgpu_device *adev);
+ void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+ int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info);
+ enum amdgpu_gfx_partition
+ (*query_partition_mode)(struct amdgpu_device *adev);
+ int (*switch_partition_mode)(struct amdgpu_device *adev,
+ int num_xccs_per_xcp);
+ int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+};
+
+struct sq_work {
+ struct work_struct work;
+ unsigned ih_data;
+};
+
+struct amdgpu_pfp {
+ struct amdgpu_bo *pfp_fw_obj;
+ uint64_t pfp_fw_gpu_addr;
+ uint32_t *pfp_fw_ptr;
+
+ struct amdgpu_bo *pfp_fw_data_obj;
+ uint64_t pfp_fw_data_gpu_addr;
+ uint32_t *pfp_fw_data_ptr;
+};
+
+struct amdgpu_ce {
+ struct amdgpu_bo *ce_fw_obj;
+ uint64_t ce_fw_gpu_addr;
+ uint32_t *ce_fw_ptr;
+};
+
+struct amdgpu_me {
+ struct amdgpu_bo *me_fw_obj;
+ uint64_t me_fw_gpu_addr;
+ uint32_t *me_fw_ptr;
+
+ struct amdgpu_bo *me_fw_data_obj;
+ uint64_t me_fw_data_gpu_addr;
+ uint32_t *me_fw_data_ptr;
+
+ uint32_t num_me;
+ uint32_t num_pipe_per_me;
+ uint32_t num_queue_per_pipe;
+ void *mqd_backup[AMDGPU_MAX_GFX_RINGS];
+
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+};
+
+struct amdgpu_gfx {
+ struct mutex gpu_clock_mutex;
+ struct amdgpu_gfx_config config;
+ struct amdgpu_rlc rlc;
+ struct amdgpu_pfp pfp;
+ struct amdgpu_ce ce;
+ struct amdgpu_me me;
+ struct amdgpu_mec mec;
+ struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES];
+ struct amdgpu_imu imu;
+ bool rs64_enable; /* firmware format */
+ const struct firmware *me_fw; /* ME firmware */
+ uint32_t me_fw_version;
+ const struct firmware *pfp_fw; /* PFP firmware */
+ uint32_t pfp_fw_version;
+ const struct firmware *ce_fw; /* CE firmware */
+ uint32_t ce_fw_version;
+ const struct firmware *rlc_fw; /* RLC firmware */
+ uint32_t rlc_fw_version;
+ const struct firmware *mec_fw; /* MEC firmware */
+ uint32_t mec_fw_version;
+ const struct firmware *mec2_fw; /* MEC2 firmware */
+ uint32_t mec2_fw_version;
+ const struct firmware *imu_fw; /* IMU firmware */
+ uint32_t imu_fw_version;
+ uint32_t me_feature_version;
+ uint32_t ce_feature_version;
+ uint32_t pfp_feature_version;
+ uint32_t rlc_feature_version;
+ uint32_t rlc_srlc_fw_version;
+ uint32_t rlc_srlc_feature_version;
+ uint32_t rlc_srlg_fw_version;
+ uint32_t rlc_srlg_feature_version;
+ uint32_t rlc_srls_fw_version;
+ uint32_t rlc_srls_feature_version;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
+ uint32_t mec_feature_version;
+ uint32_t mec2_feature_version;
+ bool mec_fw_write_wait;
+ bool me_fw_write_wait;
+ bool cp_fw_write_wait;
+ struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
+ unsigned num_gfx_rings;
+ struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+ unsigned num_compute_rings;
+ struct amdgpu_irq_src eop_irq;
+ struct amdgpu_irq_src priv_reg_irq;
+ struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src cp_ecc_error_irq;
+ struct amdgpu_irq_src sq_irq;
+ struct amdgpu_irq_src rlc_gc_fed_irq;
+ struct sq_work sq_work;
+
+ /* gfx status */
+ uint32_t gfx_current_status;
+ /* ce ram size*/
+ unsigned ce_ram_size;
+ struct amdgpu_cu_info cu_info;
+ const struct amdgpu_gfx_funcs *funcs;
+
+ /* reset mask */
+ uint32_t grbm_soft_reset;
+ uint32_t srbm_soft_reset;
+
+ /* gfx off */
+ bool gfx_off_state; /* true: enabled, false: disabled */
+ struct mutex gfx_off_mutex; /* mutex to change gfxoff state */
+ uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+ struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */
+ uint32_t gfx_off_residency; /* last logged residency */
+ uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */
+
+ /* pipe reservation */
+ struct mutex pipe_reserve_mutex;
+ DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /*ras */
+ struct ras_common_if *ras_if;
+ struct amdgpu_gfx_ras *ras;
+
+ bool is_poweron;
+
+ struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+ struct amdgpu_ring_mux muxer;
+
+ bool cp_gfx_shadow; /* for gfx11 */
+
+ uint16_t xcc_mask;
+ uint32_t num_xcc_per_xcp;
+ struct mutex partition_mutex;
+ bool mcbp; /* mid command buffer preemption */
+};
+
+struct amdgpu_gfx_ras_reg_entry {
+ struct amdgpu_ras_err_status_reg_entry reg_entry;
+ enum amdgpu_gfx_ras_mem_id_type mem_id_type;
+ uint32_t se_num;
+};
+
+struct amdgpu_gfx_ras_mem_id_entry {
+ const struct amdgpu_ras_memory_id_entry *mem_id_ent;
+ uint32_t size;
+};
+
+#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
+
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
+#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
+
+/**
+ * amdgpu_gfx_create_bitmask - create a bitmask
+ *
+ * @bit_width: length of the mask
+ *
+ * create a variable length bit mask.
+ * Returns the bitmask.
+ */
+static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
+{
+ return (u32)((1ULL << bit_width) - 1);
+}
+
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
+ unsigned max_sh);
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq, int xcc_id);
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+ unsigned hpd_size, int xcc_id);
+
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+ unsigned mqd_size, int xcc_id);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue);
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
+ int mec, int pipe, int queue);
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
+ int pipe, int queue);
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+ int *me, int *pipe, int *queue);
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
+ int pipe, int queue);
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+ void *ras_error_status,
+ void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+ int xcc_id));
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return "SPX";
+ case AMDGPU_DPX_PARTITION_MODE:
+ return "DPX";
+ case AMDGPU_TPX_PARTITION_MODE:
+ return "TPX";
+ case AMDGPU_QPX_PARTITION_MODE:
+ return "QPX";
+ case AMDGPU_CPX_PARTITION_MODE:
+ return "CPX";
+ default:
+ return "UNKNOWN";
+ }
+
+ return "UNKNOWN";
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
new file mode 100644
index 000000000..c7b44aeb6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GFXHUB_H__
+#define __AMDGPU_GFXHUB_H__
+
+struct amdgpu_gfxhub_funcs {
+ u64 (*get_fb_location)(struct amdgpu_device *adev);
+ u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
+ void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
+ int (*gart_enable)(struct amdgpu_device *adev);
+
+ void (*gart_disable)(struct amdgpu_device *adev);
+ void (*set_fault_enable_default)(struct amdgpu_device *adev, bool value);
+ void (*init)(struct amdgpu_device *adev);
+ int (*get_xgmi_info)(struct amdgpu_device *adev);
+ void (*utcl2_harvest)(struct amdgpu_device *adev);
+ void (*mode2_save_regs)(struct amdgpu_device *adev);
+ void (*mode2_restore_regs)(struct amdgpu_device *adev);
+ void (*halt)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_gfxhub {
+ const struct amdgpu_gfxhub_funcs *funcs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
new file mode 100644
index 000000000..d78bd9732
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
+#include "amdgpu.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+/**
+ * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for pdb0 and map it for CPU access
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
+{
+ int r;
+ struct amdgpu_bo_param bp;
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
+ uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = PAGE_ALIGN((npdes + 1) * 8);
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
+ if (unlikely(r != 0))
+ goto bo_reserve_failure;
+
+ r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto bo_pin_failure;
+ r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
+ if (r)
+ goto bo_kmap_failure;
+
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+ return 0;
+
+bo_kmap_failure:
+ amdgpu_bo_unpin(adev->gmc.pdb0_bo);
+bo_pin_failure:
+ amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+bo_reserve_failure:
+ amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+ return r;
+}
+
+/**
+ * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
+ *
+ * @bo: the BO to get the PDE for
+ * @level: the level in the PD hirarchy
+ * @addr: resulting addr
+ * @flags: resulting flags
+ *
+ * Get the address and flags to be used for a PDE (Page Directory Entry).
+ */
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_TT:
+ *addr = bo->tbo.ttm->dma_address[0];
+ break;
+ case TTM_PL_VRAM:
+ *addr = amdgpu_bo_gpu_offset(bo);
+ break;
+ default:
+ *addr = 0;
+ break;
+ }
+ *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
+ amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
+}
+
+/*
+ * amdgpu_gmc_pd_addr - return the address of the root directory
+ */
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t pd_addr;
+
+ /* TODO: move that into ASIC specific code */
+ if (adev->asic_type >= CHIP_VEGA10) {
+ uint64_t flags = AMDGPU_PTE_VALID;
+
+ amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
+ pd_addr |= flags;
+ } else {
+ pd_addr = amdgpu_bo_gpu_offset(bo);
+ }
+ return pd_addr;
+}
+
+/**
+ * amdgpu_gmc_set_pte_pde - update the page tables using CPU
+ *
+ * @adev: amdgpu_device pointer
+ * @cpu_pt_addr: cpu address of the page table
+ * @gpu_page_idx: entry in the page table to update
+ * @addr: dst addr to write into pte/pde
+ * @flags: access flags
+ *
+ * Update the page tables using CPU.
+ */
+int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags)
+{
+ void __iomem *ptr = (void *)cpu_pt_addr;
+ uint64_t value;
+
+ /*
+ * The following is for PTE only. GART does not have PDEs.
+ */
+ value = addr & 0x0000FFFFFFFFF000ULL;
+ value |= flags;
+ writeq(value, ptr + (gpu_page_idx * 8));
+
+ return 0;
+}
+
+/**
+ * amdgpu_gmc_agp_addr - return the address in the AGP address space
+ *
+ * @bo: TTM BO which needs the address, must be in GTT domain
+ *
+ * Tries to figure out how to access the BO through the AGP aperture. Returns
+ * AMDGPU_BO_INVALID_OFFSET if that is not possible.
+ */
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+
+ if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
+ return AMDGPU_BO_INVALID_OFFSET;
+
+ if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
+ return AMDGPU_BO_INVALID_OFFSET;
+
+ return adev->gmc.agp_start + bo->ttm->dma_address[0];
+}
+
+/**
+ * amdgpu_gmc_vram_location - try to find VRAM location
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ * @base: base address at which to put VRAM
+ *
+ * Function will try to place VRAM at base address provided
+ * as parameter.
+ */
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ u64 base)
+{
+ uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
+ uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
+
+ mc->vram_start = base;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ if (limit < mc->real_vram_size)
+ mc->real_vram_size = limit;
+
+ if (vis_limit && vis_limit < mc->visible_vram_size)
+ mc->visible_vram_size = vis_limit;
+
+ if (mc->real_vram_size < mc->visible_vram_size)
+ mc->visible_vram_size = mc->real_vram_size;
+
+ if (mc->xgmi.num_physical_nodes == 0) {
+ mc->fb_start = mc->vram_start;
+ mc->fb_end = mc->vram_end;
+ }
+ dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+ mc->mc_vram_size >> 20, mc->vram_start,
+ mc->vram_end, mc->real_vram_size >> 20);
+}
+
+/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * This function is only used if use GART for FB translation. In such
+ * case, we use sysvm aperture (vmid0 page tables) for both vram
+ * and gart (aka system memory) access.
+ *
+ * GPUVM (and our organization of vmid0 page tables) require sysvm
+ * aperture to be placed at a location aligned with 8 times of native
+ * page size. For example, if vm_context0_cntl.page_table_block_size
+ * is 12, then native page size is 8G (2M*2^12), sysvm should start
+ * with a 64G aligned address. For simplicity, we just put sysvm at
+ * address 0. So vram start at address 0 and gart is right after vram.
+ */
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ u64 hive_vram_start = 0;
+ u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
+ mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
+ mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+ mc->gart_start = hive_vram_end + 1;
+ mc->gart_end = mc->gart_start + mc->gart_size - 1;
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+ mc->mc_vram_size >> 20, mc->vram_start,
+ mc->vram_end, mc->real_vram_size >> 20);
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+ mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_gart_location - try to find GART location
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * Function will place try to place GART before or after VRAM.
+ * If GART size is bigger than space left then we ajust GART size.
+ * Thus function will never fails.
+ */
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ const uint64_t four_gb = 0x100000000ULL;
+ u64 size_af, size_bf;
+ /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
+ u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
+
+ /* VCE doesn't like it when BOs cross a 4GB segment, so align
+ * the GART base on a 4GB boundary as well.
+ */
+ size_bf = mc->fb_start;
+ size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
+
+ if (mc->gart_size > max(size_bf, size_af)) {
+ dev_warn(adev->dev, "limiting GART\n");
+ mc->gart_size = max(size_bf, size_af);
+ }
+
+ if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+ (size_af < mc->gart_size))
+ mc->gart_start = 0;
+ else
+ mc->gart_start = max_mc_address - mc->gart_size + 1;
+
+ mc->gart_start &= ~(four_gb - 1);
+ mc->gart_end = mc->gart_start + mc->gart_size - 1;
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+ mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_agp_location - try to find AGP location
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * Function will place try to find a place for the AGP BAR in the MC address
+ * space.
+ *
+ * AGP BAR will be assigned the largest available hole in the address space.
+ * Should be called after VRAM and GART locations are setup.
+ */
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ const uint64_t sixteen_gb = 1ULL << 34;
+ const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
+ u64 size_af, size_bf;
+
+ if (amdgpu_sriov_vf(adev)) {
+ mc->agp_start = 0xffffffffffff;
+ mc->agp_end = 0x0;
+ mc->agp_size = 0;
+
+ return;
+ }
+
+ if (mc->fb_start > mc->gart_start) {
+ size_bf = (mc->fb_start & sixteen_gb_mask) -
+ ALIGN(mc->gart_end + 1, sixteen_gb);
+ size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
+ } else {
+ size_bf = mc->fb_start & sixteen_gb_mask;
+ size_af = (mc->gart_start & sixteen_gb_mask) -
+ ALIGN(mc->fb_end + 1, sixteen_gb);
+ }
+
+ if (size_bf > size_af) {
+ mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
+ mc->agp_size = size_bf;
+ } else {
+ mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
+ mc->agp_size = size_af;
+ }
+
+ mc->agp_end = mc->agp_start + mc->agp_size - 1;
+ dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
+ mc->agp_size >> 20, mc->agp_start, mc->agp_end);
+}
+
+/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+ return addr << 4 | pasid;
+}
+
+/**
+ * amdgpu_gmc_filter_faults - filter VM faults
+ *
+ * @adev: amdgpu device structure
+ * @ih: interrupt ring that the fault received from
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ * @timestamp: timestamp of the fault
+ *
+ * Returns:
+ * True if the fault was filtered and should not be processed further.
+ * False if the fault is a new one and needs to be handled.
+ */
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
+ uint16_t pasid, uint64_t timestamp)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_gmc_fault *fault;
+ uint32_t hash;
+
+ /* Stale retry fault if timestamp goes backward */
+ if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
+ return true;
+
+ /* If we don't have space left in the ring buffer return immediately */
+ stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
+ AMDGPU_GMC_FAULT_TIMEOUT;
+ if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
+ return true;
+
+ /* Try to find the fault in the hash */
+ hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+ fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+ while (fault->timestamp >= stamp) {
+ uint64_t tmp;
+
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * if we get a fault which is already present in
+ * the fault_ring and the timestamp of
+ * the fault is after the expired timestamp,
+ * then this is a new fault that needs to be added
+ * into the fault ring.
+ */
+ if (fault->timestamp_expiry != 0 &&
+ amdgpu_ih_ts_after(fault->timestamp_expiry,
+ timestamp))
+ break;
+ else
+ return true;
+ }
+
+ tmp = fault->timestamp;
+ fault = &gmc->fault_ring[fault->next];
+
+ /* Check if the entry was reused */
+ if (fault->timestamp >= tmp)
+ break;
+ }
+
+ /* Add the fault to the ring */
+ fault = &gmc->fault_ring[gmc->last_fault];
+ atomic64_set(&fault->key, key);
+ fault->timestamp = timestamp;
+
+ /* And update the hash */
+ fault->next = gmc->fault_hash[hash].idx;
+ gmc->fault_hash[hash].idx = gmc->last_fault++;
+ return false;
+}
+
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_ih_ring *ih;
+ struct amdgpu_gmc_fault *fault;
+ uint32_t last_wptr;
+ uint64_t last_ts;
+ uint32_t hash;
+ uint64_t tmp;
+
+ ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1;
+ /* Get the WPTR of the last entry in IH ring */
+ last_wptr = amdgpu_ih_get_wptr(adev, ih);
+ /* Order wptr with ring data. */
+ rmb();
+ /* Get the timetamp of the last entry in IH ring */
+ last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
+
+ hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+ fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+ do {
+ if (atomic64_read(&fault->key) == key) {
+ /*
+ * Update the timestamp when this fault
+ * expired.
+ */
+ fault->timestamp_expiry = last_ts;
+ break;
+ }
+
+ tmp = fault->timestamp;
+ fault = &gmc->fault_ring[fault->next];
+ } while (fault->timestamp < tmp);
+}
+
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* umc ras block */
+ r = amdgpu_umc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* mmhub ras block */
+ r = amdgpu_mmhub_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* hdp ras block */
+ r = amdgpu_hdp_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* mca.x ras block */
+ r = amdgpu_mca_mp0_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_mca_mp1_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_mca_mpio_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ /* xgmi ras block */
+ r = amdgpu_xgmi_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
+{
+
+}
+
+ /*
+ * The latest engine allocation on gfx9/10 is:
+ * Engine 2, 3: firmware
+ * Engine 0, 1, 4~16: amdgpu ring,
+ * subject to change when ring number changes
+ * Engine 17: Gart flushes
+ */
+#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
+
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
+ unsigned i;
+ unsigned vmhub, inv_eng;
+
+ /* init the vm inv eng for all vmhubs */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
+ /* reserve engine 5 for firmware */
+ if (adev->enable_mes)
+ vm_inv_engs[i] &= ~(1 << 5);
+ }
+
+ for (i = 0; i < adev->num_rings; ++i) {
+ ring = adev->rings[i];
+ vmhub = ring->vm_hub;
+
+ if (ring == &adev->mes.ring)
+ continue;
+
+ inv_eng = ffs(vm_inv_engs[vmhub]);
+ if (!inv_eng) {
+ dev_err(adev->dev, "no VM inv eng for ring %s\n",
+ ring->name);
+ return -EINVAL;
+ }
+
+ ring->vm_inv_eng = inv_eng - 1;
+ vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+
+ dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
+ ring->name, ring->vm_inv_eng, ring->vm_hub);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
+ * @adev: amdgpu_device pointer
+ *
+ * Check and set if an the device @adev supports Trusted Memory
+ * Zones (TMZ).
+ */
+void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ /* RAVEN */
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ /* RENOIR looks like RAVEN */
+ case IP_VERSION(9, 3, 0):
+ /* GC 10.3.7 */
+ case IP_VERSION(10, 3, 7):
+ /* GC 11.0.1 */
+ case IP_VERSION(11, 0, 1):
+ if (amdgpu_tmz == 0) {
+ adev->gmc.tmz_enabled = false;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
+ } else {
+ adev->gmc.tmz_enabled = true;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature enabled\n");
+ }
+ break;
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ /* VANGOGH */
+ case IP_VERSION(10, 3, 1):
+ /* YELLOW_CARP*/
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(11, 0, 4):
+ /* Don't enable it by default yet.
+ */
+ if (amdgpu_tmz < 1) {
+ adev->gmc.tmz_enabled = false;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
+ } else {
+ adev->gmc.tmz_enabled = true;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
+ }
+ break;
+ default:
+ adev->gmc.tmz_enabled = false;
+ dev_info(adev->dev,
+ "Trusted Memory Zone (TMZ) feature not supported\n");
+ break;
+ }
+}
+
+/**
+ * amdgpu_gmc_noretry_set -- set per asic noretry defaults
+ * @adev: amdgpu_device pointer
+ *
+ * Set a per asic default for the no-retry parameter.
+ *
+ */
+void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint32_t gc_ver = adev->ip_versions[GC_HWIP][0];
+ bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+ gc_ver == IP_VERSION(9, 3, 0) ||
+ gc_ver == IP_VERSION(9, 4, 0) ||
+ gc_ver == IP_VERSION(9, 4, 1) ||
+ gc_ver == IP_VERSION(9, 4, 2) ||
+ gc_ver == IP_VERSION(9, 4, 3) ||
+ gc_ver >= IP_VERSION(10, 3, 0));
+
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
+}
+
+void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
+ bool enable)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, i;
+
+ hub = &adev->vmhub[hub_type];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + hub->ctx_distance * i;
+
+ tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
+ RREG32_SOC15_IP(GC, reg) :
+ RREG32_SOC15_IP(MMHUB, reg);
+
+ if (enable)
+ tmp |= hub->vm_cntx_cntl_vm_fault;
+ else
+ tmp &= ~hub->vm_cntx_cntl_vm_fault;
+
+ (hub_type == AMDGPU_GFXHUB(0)) ?
+ WREG32_SOC15_IP(GC, reg, tmp) :
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ }
+}
+
+void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
+{
+ unsigned size;
+
+ /*
+ * Some ASICs need to reserve a region of video memory to avoid access
+ * from driver
+ */
+ adev->mman.stolen_reserved_offset = 0;
+ adev->mman.stolen_reserved_size = 0;
+
+ /*
+ * TODO:
+ * Currently there is a bug where some memory client outside
+ * of the driver writes to first 8M of VRAM on S3 resume,
+ * this overrides GART which by default gets placed in first 8M and
+ * causes VM_FAULTS once GTT is accessed.
+ * Keep the stolen memory reservation until the while this is not solved.
+ */
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ adev->mman.keep_stolen_vga_memory = true;
+ /*
+ * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
+ */
+#ifdef CONFIG_X86
+ if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
+ adev->mman.stolen_reserved_offset = 0x500000;
+ adev->mman.stolen_reserved_size = 0x200000;
+ }
+#endif
+ break;
+ case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ adev->mman.keep_stolen_vga_memory = true;
+ break;
+ case CHIP_YELLOW_CARP:
+ if (amdgpu_discovery == 0) {
+ adev->mman.stolen_reserved_offset = 0x1ffb0000;
+ adev->mman.stolen_reserved_size = 64 * PAGE_SIZE;
+ }
+ break;
+ default:
+ adev->mman.keep_stolen_vga_memory = false;
+ break;
+ }
+
+ if (amdgpu_sriov_vf(adev) ||
+ !amdgpu_device_has_display_hardware(adev)) {
+ size = 0;
+ } else {
+ size = amdgpu_gmc_get_vbios_fb_size(adev);
+
+ if (adev->mman.keep_stolen_vga_memory)
+ size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
+ }
+
+ /* set to 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ size = 0;
+
+ if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
+ adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
+ } else {
+ adev->mman.stolen_vga_size = size;
+ adev->mman.stolen_extended_size = 0;
+ }
+}
+
+/**
+ * amdgpu_gmc_init_pdb0 - initialize PDB0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This function is only used when GART page table is used
+ * for FB address translatioin. In such a case, we construct
+ * a 2-level system VM page table: PDB0->PTB, to cover both
+ * VRAM of the hive and system memory.
+ *
+ * PDB0 is static, initialized once on driver initialization.
+ * The first n entries of PDB0 are used as PTE by setting
+ * P bit to 1, pointing to VRAM. The n+1'th entry points
+ * to a big PTB covering system memory.
+ *
+ */
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
+{
+ int i;
+ uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
+ /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
+ */
+ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+ u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
+ u64 vram_addr = adev->vm_manager.vram_base_offset -
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ u64 vram_end = vram_addr + vram_size;
+ u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+ flags |= AMDGPU_PTE_WRITEABLE;
+ flags |= AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
+ flags |= AMDGPU_PDE_PTE;
+
+ /* The first n PDE0 entries are used as PTE,
+ * pointing to vram
+ */
+ for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
+
+ /* The n+1'th PDE0 entry points to a huge
+ * PTB who has more than 512 entries each
+ * pointing to a 4K system page
+ */
+ flags = AMDGPU_PTE_VALID;
+ flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
+ /* Requires gart_ptb_gpu_pa to be 4K aligned */
+ amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+ drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
+ * address
+ *
+ * @adev: amdgpu_device pointer
+ * @mc_addr: MC address of buffer
+ */
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
+{
+ return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
+}
+
+/**
+ * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
+ * GPU's view
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: amdgpu buffer object
+ */
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
+}
+
+/**
+ * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
+ * from CPU's view
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: amdgpu buffer object
+ */
+uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
+}
+
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo *vram_bo = NULL;
+ uint64_t vram_gpu = 0;
+ void *vram_ptr = NULL;
+
+ int ret, size = 0x100000;
+ uint8_t cptr[10];
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &vram_bo,
+ &vram_gpu,
+ &vram_ptr);
+ if (ret)
+ return ret;
+
+ memset(vram_ptr, 0x86, size);
+ memset(cptr, 0x86, 10);
+
+ /**
+ * Check the start, the mid, and the end of the memory if the content of
+ * each byte is the pattern "0x86". If yes, we suppose the vram bo is
+ * workable.
+ *
+ * Note: If check the each byte of whole 1M bo, it will cost too many
+ * seconds, so here, we just pick up three parts for emulation.
+ */
+ ret = memcmp(vram_ptr, cptr, 10);
+ if (ret)
+ return ret;
+
+ ret = memcmp(vram_ptr + (size / 2), cptr, 10);
+ if (ret)
+ return ret;
+
+ ret = memcmp(vram_ptr + size - 10, cptr, 10);
+ if (ret)
+ return ret;
+
+ amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
+ &vram_ptr);
+
+ return 0;
+}
+
+static ssize_t current_memory_partition_show(
+ struct device *dev, struct device_attribute *addr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ switch (mode) {
+ case AMDGPU_NPS1_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS1\n");
+ case AMDGPU_NPS2_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS2\n");
+ case AMDGPU_NPS3_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS3\n");
+ case AMDGPU_NPS4_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS4\n");
+ case AMDGPU_NPS6_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS6\n");
+ case AMDGPU_NPS8_PARTITION_MODE:
+ return sysfs_emit(buf, "NPS8\n");
+ default:
+ return sysfs_emit(buf, "UNKNOWN\n");
+ }
+
+ return sysfs_emit(buf, "UNKNOWN\n");
+}
+
+static DEVICE_ATTR_RO(current_memory_partition);
+
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return 0;
+
+ return device_create_file(adev->dev,
+ &dev_attr_current_memory_partition);
+}
+
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
new file mode 100644
index 000000000..fdc25cd55
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -0,0 +1,422 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef __AMDGPU_GMC_H__
+#define __AMDGPU_GMC_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_irq.h"
+#include "amdgpu_ras.h"
+
+/* VA hole for 48bit addresses on Vega10 */
+#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
+#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
+
+/*
+ * Hardware is programmed as if the hole doesn't exists with start and end
+ * address values.
+ *
+ * This mask is used to remove the upper 16bits of the VA and so come up with
+ * the linear addr value.
+ */
+#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
+
+/*
+ * Ring size as power of two for the log of recent faults.
+ */
+#define AMDGPU_GMC_FAULT_RING_ORDER 8
+#define AMDGPU_GMC_FAULT_RING_SIZE (1 << AMDGPU_GMC_FAULT_RING_ORDER)
+
+/*
+ * Hash size as power of two for the log of recent faults
+ */
+#define AMDGPU_GMC_FAULT_HASH_ORDER 8
+#define AMDGPU_GMC_FAULT_HASH_SIZE (1 << AMDGPU_GMC_FAULT_HASH_ORDER)
+
+/*
+ * Number of IH timestamp ticks until a fault is considered handled
+ */
+#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+
+struct firmware;
+
+enum amdgpu_memory_partition {
+ UNKNOWN_MEMORY_PARTITION_MODE = 0,
+ AMDGPU_NPS1_PARTITION_MODE = 1,
+ AMDGPU_NPS2_PARTITION_MODE = 2,
+ AMDGPU_NPS3_PARTITION_MODE = 3,
+ AMDGPU_NPS4_PARTITION_MODE = 4,
+ AMDGPU_NPS6_PARTITION_MODE = 6,
+ AMDGPU_NPS8_PARTITION_MODE = 8,
+};
+
+/*
+ * GMC page fault information
+ */
+struct amdgpu_gmc_fault {
+ uint64_t timestamp:48;
+ uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
+ atomic64_t key;
+ uint64_t timestamp_expiry:48;
+};
+
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub_funcs {
+ void (*print_l2_protection_fault_status)(struct amdgpu_device *adev,
+ uint32_t status);
+ uint32_t (*get_invalidate_req)(unsigned int vmid, uint32_t flush_type);
+};
+
+struct amdgpu_vmhub {
+ uint32_t ctx0_ptb_addr_lo32;
+ uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_sem;
+ uint32_t vm_inv_eng0_req;
+ uint32_t vm_inv_eng0_ack;
+ uint32_t vm_context0_cntl;
+ uint32_t vm_l2_pro_fault_status;
+ uint32_t vm_l2_pro_fault_cntl;
+
+ /*
+ * store the register distances between two continuous context domain
+ * and invalidation engine.
+ */
+ uint32_t ctx_distance;
+ uint32_t ctx_addr_distance; /* include LO32/HI32 */
+ uint32_t eng_distance;
+ uint32_t eng_addr_distance; /* include LO32/HI32 */
+
+ uint32_t vm_cntx_cntl;
+ uint32_t vm_cntx_cntl_vm_fault;
+ uint32_t vm_l2_bank_select_reserved_cid2;
+
+ uint32_t vm_contexts_disable;
+
+ const struct amdgpu_vmhub_funcs *vmhub_funcs;
+};
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct amdgpu_gmc_funcs {
+ /* flush the vm tlb via mmio */
+ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
+ /* flush the vm tlb via pasid */
+ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
+ /* flush the vm tlb via ring */
+ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
+ uint64_t pd_addr);
+ /* Change the VMID -> PASID mapping */
+ void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid);
+ /* enable/disable PRT support */
+ void (*set_prt)(struct amdgpu_device *adev, bool enable);
+ /* map mtype to hardware flags */
+ uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
+ /* get the pde for a given mc addr */
+ void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+ u64 *dst, u64 *flags);
+ /* get the pte flags to use for a BO VA mapping */
+ void (*get_vm_pte)(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags);
+ /* override per-page pte flags */
+ void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags);
+ /* get the amount of memory used by the vbios for pre-OS console */
+ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+
+ enum amdgpu_memory_partition (*query_mem_partition_mode)(
+ struct amdgpu_device *adev);
+};
+
+struct amdgpu_xgmi_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 node_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+ bool supported;
+ struct ras_common_if *ras_if;
+ bool connected_to_cpu;
+ bool pending_reset;
+ struct amdgpu_xgmi_ras *ras;
+};
+
+struct amdgpu_mem_partition_info {
+ union {
+ struct {
+ uint32_t fpfn;
+ uint32_t lpfn;
+ } range;
+ struct {
+ int node;
+ } numa;
+ };
+ uint64_t size;
+};
+
+#define INVALID_PFN -1
+
+struct amdgpu_gmc {
+ /* FB's physical address in MMIO space (for CPU to
+ * map FB). This is different compared to the agp/
+ * gart/vram_start/end field as the later is from
+ * GPU's view and aper_base is from CPU's view.
+ */
+ resource_size_t aper_size;
+ resource_size_t aper_base;
+ /* for some chips with <= 32MB we need to lie
+ * about vram size near mc fb location */
+ u64 mc_vram_size;
+ u64 visible_vram_size;
+ /* AGP aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+ * Under VMID0, logical address == MC address. AGP
+ * aperture maps to physical bus or IOVA addressed.
+ * AGP aperture is used to simulate FB in ZFB case.
+ * AGP aperture is also used for page table in system
+ * memory (mainly for APU).
+ *
+ */
+ u64 agp_size;
+ u64 agp_start;
+ u64 agp_end;
+ /* GART aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+ * registers
+ * Under VMID0, logical address inside GART aperture will
+ * be translated through gpuvm gart page table to access
+ * paged system memory
+ */
+ u64 gart_size;
+ u64 gart_start;
+ u64 gart_end;
+ /* Frame buffer aperture of this GPU device. Different from
+ * fb_start (see below), this only covers the local GPU device.
+ * If driver uses FB aperture to access FB, driver get fb_start from
+ * MC_VM_FB_LOCATION_BASE (set by vbios) and calculate vram_start
+ * of this local device by adding an offset inside the XGMI hive.
+ * If driver uses GART table for VMID0 FB access, driver finds a hole in
+ * VMID0's virtual address space to place the SYSVM aperture inside
+ * which the first part is vram and the second part is gart (covering
+ * system ram).
+ */
+ u64 vram_start;
+ u64 vram_end;
+ /* FB region , it's same as local vram region in single GPU, in XGMI
+ * configuration, this region covers all GPUs in the same hive ,
+ * each GPU in the hive has the same view of this FB region .
+ * GPU0's vram starts at offset (0 * segment size) ,
+ * GPU1 starts at offset (1 * segment size), etc.
+ */
+ u64 fb_start;
+ u64 fb_end;
+ unsigned vram_width;
+ u64 real_vram_size;
+ int vram_mtrr;
+ u64 mc_mask;
+ const struct firmware *fw; /* MC firmware */
+ uint32_t fw_version;
+ struct amdgpu_irq_src vm_fault;
+ uint32_t vram_type;
+ uint8_t vram_vendor;
+ uint32_t srbm_soft_reset;
+ bool prt_warning;
+ uint32_t sdpif_register;
+ /* apertures */
+ u64 shared_aperture_start;
+ u64 shared_aperture_end;
+ u64 private_aperture_start;
+ u64 private_aperture_end;
+ /* protects concurrent invalidation */
+ spinlock_t invalidate_lock;
+ bool translate_further;
+ struct kfd_vm_fault_info *vm_fault_info;
+ atomic_t vm_fault_info_updated;
+
+ struct amdgpu_gmc_fault fault_ring[AMDGPU_GMC_FAULT_RING_SIZE];
+ struct {
+ uint64_t idx:AMDGPU_GMC_FAULT_RING_ORDER;
+ } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE];
+ uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
+
+ bool tmz_enabled;
+ bool is_app_apu;
+
+ struct amdgpu_mem_partition_info *mem_partitions;
+ uint8_t num_mem_partitions;
+ const struct amdgpu_gmc_funcs *gmc_funcs;
+
+ struct amdgpu_xgmi xgmi;
+ struct amdgpu_irq_src ecc_irq;
+ int noretry;
+
+ uint32_t vmid0_page_table_block_size;
+ uint32_t vmid0_page_table_depth;
+ struct amdgpu_bo *pdb0_bo;
+ /* CPU kmapped address of pdb0*/
+ void *ptr_pdb0;
+
+ /* MALL size */
+ u64 mall_size;
+ uint32_t m_half_use;
+
+ /* number of UMC instances */
+ int num_umc;
+ /* mode2 save restore */
+ u64 VM_L2_CNTL;
+ u64 VM_L2_CNTL2;
+ u64 VM_DUMMY_PAGE_FAULT_CNTL;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32;
+ u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32;
+ u64 VM_L2_PROTECTION_FAULT_CNTL;
+ u64 VM_L2_PROTECTION_FAULT_CNTL2;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL3;
+ u64 VM_L2_PROTECTION_FAULT_MM_CNTL4;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_LO32;
+ u64 VM_L2_PROTECTION_FAULT_ADDR_HI32;
+ u64 VM_DEBUG;
+ u64 VM_L2_MM_GROUP_RT_CLASSES;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID;
+ u64 VM_L2_BANK_SELECT_RESERVED_CID2;
+ u64 VM_L2_CACHE_PARITY_CNTL;
+ u64 VM_L2_IH_LOG_CNTL;
+ u64 VM_CONTEXT_CNTL[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16];
+ u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16];
+ u64 MC_VM_MX_L1_TLB_CNTL;
+
+ u64 noretry_flags;
+};
+
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
+ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+ ((adev), (pasid), (type), (allhub), (inst)))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
+ (adev)->gmc.gmc_funcs->override_vm_pte_flags \
+ ((adev), (vm), (addr), (pte_flags))
+#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+
+/**
+ * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * True if full VRAM is visible through the BAR
+ */
+static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
+{
+ WARN_ON(gmc->real_vram_size < gmc->visible_vram_size);
+
+ return (gmc->real_vram_size == gmc->visible_vram_size);
+}
+
+/**
+ * amdgpu_gmc_sign_extend - sign extend the given gmc address
+ *
+ * @addr: address to extend
+ */
+static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
+{
+ if (addr >= AMDGPU_GMC_HOLE_START)
+ addr |= AMDGPU_GMC_HOLE_END;
+
+ return addr;
+}
+
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+ uint64_t *addr, uint64_t *flags);
+int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags);
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc);
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ u64 base);
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih, uint64_t addr,
+ uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid);
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+
+extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
+extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
+
+extern void
+amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
+ bool enable);
+
+void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
+
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
new file mode 100644
index 000000000..44367f033
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu.h"
+
+static inline struct amdgpu_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
+{
+ return container_of(man, struct amdgpu_gtt_mgr, manager);
+}
+
+/**
+ * DOC: mem_info_gtt_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total size of
+ * the GTT.
+ * The file mem_info_gtt_total is used for this, and returns the total size of
+ * the GTT block, in bytes
+ */
+static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man;
+
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ return sysfs_emit(buf, "%llu\n", man->size);
+}
+
+/**
+ * DOC: mem_info_gtt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used GTT.
+ * The file mem_info_gtt_used is used for this, and returns the current used
+ * size of the GTT block, in bytes
+ */
+static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager;
+
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
+ amdgpu_mem_info_gtt_total_show, NULL);
+static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
+ amdgpu_mem_info_gtt_used_show, NULL);
+
+static struct attribute *amdgpu_gtt_mgr_attributes[] = {
+ &dev_attr_mem_info_gtt_total.attr,
+ &dev_attr_mem_info_gtt_used.attr,
+ NULL
+};
+
+const struct attribute_group amdgpu_gtt_mgr_attr_group = {
+ .attrs = amdgpu_gtt_mgr_attributes
+};
+
+/**
+ * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
+ *
+ * @res: the mem object to check
+ *
+ * Check if a mem object has already address space allocated.
+ */
+bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
+{
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+
+ return drm_mm_node_allocated(&node->mm_nodes[0]);
+}
+
+/**
+ * amdgpu_gtt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Dummy, allocate the node but no space for it yet.
+ */
+static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+ uint32_t num_pages = PFN_UP(tbo->base.size);
+ struct ttm_range_mgr_node *node;
+ int r;
+
+ node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, &node->base);
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > man->size) {
+ r = -ENOSPC;
+ goto err_free;
+ }
+
+ if (place->lpfn) {
+ spin_lock(&mgr->lock);
+ r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
+ num_pages, tbo->page_alignment,
+ 0, place->fpfn, place->lpfn,
+ DRM_MM_INSERT_BEST);
+ spin_unlock(&mgr->lock);
+ if (unlikely(r))
+ goto err_free;
+
+ node->base.start = node->mm_nodes[0].start;
+ } else {
+ node->mm_nodes[0].start = 0;
+ node->mm_nodes[0].size = PFN_UP(node->base.size);
+ node->base.start = AMDGPU_BO_INVALID_OFFSET;
+ }
+
+ *res = &node->base;
+ return 0;
+
+err_free:
+ ttm_resource_fini(man, &node->base);
+ kfree(node);
+ return r;
+}
+
+/**
+ * amdgpu_gtt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+ struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+
+ spin_lock(&mgr->lock);
+ if (drm_mm_node_allocated(&node->mm_nodes[0]))
+ drm_mm_remove_node(&node->mm_nodes[0]);
+ spin_unlock(&mgr->lock);
+
+ ttm_resource_fini(man, res);
+ kfree(node);
+}
+
+/**
+ * amdgpu_gtt_mgr_recover - re-init gart
+ *
+ * @mgr: amdgpu_gtt_mgr pointer
+ *
+ * Re-init the gart for each known BO in the GTT.
+ */
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
+{
+ struct ttm_range_mgr_node *node;
+ struct drm_mm_node *mm_node;
+ struct amdgpu_device *adev;
+
+ adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+ spin_lock(&mgr->lock);
+ drm_mm_for_each_node(mm_node, &mgr->mm) {
+ node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+ amdgpu_ttm_recover_gart(node->base.bo);
+ }
+ spin_unlock(&mgr->lock);
+
+ amdgpu_gart_invalidate_tlb(adev);
+}
+
+/**
+ * amdgpu_gtt_mgr_intersects - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_compatible - test for compatibility
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified compatibility test.
+ */
+static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @printer: DRM printer to use
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+ struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+
+ spin_lock(&mgr->lock);
+ drm_mm_print(&mgr->mm, printer);
+ spin_unlock(&mgr->lock);
+}
+
+static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
+ .alloc = amdgpu_gtt_mgr_new,
+ .free = amdgpu_gtt_mgr_del,
+ .intersects = amdgpu_gtt_mgr_intersects,
+ .compatible = amdgpu_gtt_mgr_compatible,
+ .debug = amdgpu_gtt_mgr_debug
+};
+
+/**
+ * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt_size: maximum size of GTT
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ uint64_t start, size;
+
+ man->use_tt = true;
+ man->func = &amdgpu_gtt_mgr_func;
+
+ ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
+
+ start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+ drm_mm_init(&mgr->mm, start, size);
+ spin_lock_init(&mgr->lock);
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_gtt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ spin_lock(&mgr->lock);
+ drm_mm_takedown(&mgr->mm);
+ spin_unlock(&mgr->lock);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 000000000..b6cf80193
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_hdp_ras *ras;
+
+ if (!adev->hdp.ras)
+ return 0;
+
+ ras = adev->hdp.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register hdp ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "hdp");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__HDP;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if = &ras->ras_block.ras_comm;
+
+ /* hdp ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
new file mode 100644
index 000000000..7b8a6152d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_HDP_H__
+#define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
+
+struct amdgpu_hdp_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_hdp_funcs {
+ void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+ void (*invalidate_hdp)(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+ void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
+ void (*init_registers)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_hdp {
+ struct ras_common_if *ras_if;
+ const struct amdgpu_hdp_funcs *funcs;
+ struct amdgpu_hdp_ras *ras;
+};
+
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
new file mode 100644
index 000000000..081267161
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_hmm.h"
+
+#define MAX_WALK_BYTE (2UL << 30)
+
+/**
+ * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
+ */
+static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ long r;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ mutex_lock(&adev->notifier_lock);
+
+ mmu_interval_set_seq(mni, cur_seq);
+
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = {
+ .invalidate = amdgpu_hmm_invalidate_gfx,
+};
+
+/**
+ * amdgpu_hmm_invalidate_hsa - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
+ */
+static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
+ .invalidate = amdgpu_hmm_invalidate_hsa,
+};
+
+/**
+ * amdgpu_hmm_register - register a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers a mmu_notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_hmm_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_hmm_gfx_ops);
+}
+
+/**
+ * amdgpu_hmm_unregister - unregister a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ *
+ * Remove any registration of mmu notifier updates from the buffer object.
+ */
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
+{
+ if (!bo->notifier.mm)
+ return;
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
+}
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner, struct page **pages,
+ struct hmm_range **phmm_range)
+{
+ struct hmm_range *hmm_range;
+ unsigned long end;
+ unsigned long timeout;
+ unsigned long i;
+ unsigned long *pfns;
+ int r = 0;
+
+ hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+ if (unlikely(!hmm_range))
+ return -ENOMEM;
+
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_range;
+ }
+
+ hmm_range->notifier = notifier;
+ hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+ if (!readonly)
+ hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+ hmm_range->hmm_pfns = pfns;
+ hmm_range->start = start;
+ end = start + npages * PAGE_SIZE;
+ hmm_range->dev_private_owner = owner;
+
+ do {
+ hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+ pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+ hmm_range->start, hmm_range->end);
+
+ /* Assuming 128MB takes maximum 1 second to fault page address */
+ timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
+ timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
+ timeout = jiffies + msecs_to_jiffies(timeout);
+
+retry:
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+ r = hmm_range_fault(hmm_range);
+ if (unlikely(r)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if (r == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
+
+ if (hmm_range->end == end)
+ break;
+ hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+ hmm_range->start = hmm_range->end;
+ schedule();
+ } while (hmm_range->end < end);
+
+ hmm_range->start = start;
+ hmm_range->hmm_pfns = pfns;
+
+ /*
+ * Due to default_flags, all pages are HMM_PFN_VALID or
+ * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+ * the notifier_lock, and mmu_interval_read_retry() must be done first.
+ */
+ for (i = 0; pages && i < npages; i++)
+ pages[i] = hmm_pfn_to_page(pfns[i]);
+
+ *phmm_range = hmm_range;
+
+ return 0;
+
+out_free_pfns:
+ kvfree(pfns);
+out_free_range:
+ kfree(hmm_range);
+
+ return r;
+}
+
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+ bool r;
+
+ r = mmu_interval_read_retry(hmm_range->notifier,
+ hmm_range->notifier_seq);
+ kvfree(hmm_range->hmm_pfns);
+ kfree(hmm_range);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
new file mode 100644
index 000000000..e2edcd010
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_MN_H__
+#define __AMDGPU_MN_H__
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ uint64_t start, uint64_t npages, bool readonly,
+ void *owner, struct page **pages,
+ struct hmm_range **phmm_range);
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
+#if defined(CONFIG_HMM_MIRROR)
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
+#else
+static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+ DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+ "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
+ return -ENODEV;
+}
+static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
new file mode 100644
index 000000000..82608df43
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <linux/export.h>
+#include <linux/pci.h>
+
+#include <drm/drm_edid.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_atombios.h"
+#include "atom.h"
+#include "atombios_dp.h"
+#include "atombios_i2c.h"
+
+/* bit banging i2c */
+static int amdgpu_i2c_pre_xfer(struct i2c_adapter *i2c_adap)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t temp;
+
+ mutex_lock(&i2c->mutex);
+
+ /* switch the pads to ddc mode */
+ if (rec->hw_capable) {
+ temp = RREG32(rec->mask_clk_reg);
+ temp &= ~(1 << 16);
+ WREG32(rec->mask_clk_reg, temp);
+ }
+
+ /* clear the output pin values */
+ temp = RREG32(rec->a_clk_reg) & ~rec->a_clk_mask;
+ WREG32(rec->a_clk_reg, temp);
+
+ temp = RREG32(rec->a_data_reg) & ~rec->a_data_mask;
+ WREG32(rec->a_data_reg, temp);
+
+ /* set the pins to input */
+ temp = RREG32(rec->en_clk_reg) & ~rec->en_clk_mask;
+ WREG32(rec->en_clk_reg, temp);
+
+ temp = RREG32(rec->en_data_reg) & ~rec->en_data_mask;
+ WREG32(rec->en_data_reg, temp);
+
+ /* mask the gpio pins for software use */
+ temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
+ WREG32(rec->mask_clk_reg, temp);
+ temp = RREG32(rec->mask_clk_reg);
+
+ temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+ WREG32(rec->mask_data_reg, temp);
+ temp = RREG32(rec->mask_data_reg);
+
+ return 0;
+}
+
+static void amdgpu_i2c_post_xfer(struct i2c_adapter *i2c_adap)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t temp;
+
+ /* unmask the gpio pins for software use */
+ temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+ WREG32(rec->mask_clk_reg, temp);
+ temp = RREG32(rec->mask_clk_reg);
+
+ temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
+ WREG32(rec->mask_data_reg, temp);
+ temp = RREG32(rec->mask_data_reg);
+
+ mutex_unlock(&i2c->mutex);
+}
+
+static int amdgpu_i2c_get_clock(void *i2c_priv)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_priv;
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t val;
+
+ /* read the value off the pin */
+ val = RREG32(rec->y_clk_reg);
+ val &= rec->y_clk_mask;
+
+ return (val != 0);
+}
+
+
+static int amdgpu_i2c_get_data(void *i2c_priv)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_priv;
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t val;
+
+ /* read the value off the pin */
+ val = RREG32(rec->y_data_reg);
+ val &= rec->y_data_mask;
+
+ return (val != 0);
+}
+
+static void amdgpu_i2c_set_clock(void *i2c_priv, int clock)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_priv;
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t val;
+
+ /* set pin direction */
+ val = RREG32(rec->en_clk_reg) & ~rec->en_clk_mask;
+ val |= clock ? 0 : rec->en_clk_mask;
+ WREG32(rec->en_clk_reg, val);
+}
+
+static void amdgpu_i2c_set_data(void *i2c_priv, int data)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_priv;
+ struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+ struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t val;
+
+ /* set pin direction */
+ val = RREG32(rec->en_data_reg) & ~rec->en_data_mask;
+ val |= data ? 0 : rec->en_data_mask;
+ WREG32(rec->en_data_reg, val);
+}
+
+static const struct i2c_algorithm amdgpu_atombios_i2c_algo = {
+ .master_xfer = amdgpu_atombios_i2c_xfer,
+ .functionality = amdgpu_atombios_i2c_func,
+};
+
+struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+ const struct amdgpu_i2c_bus_rec *rec,
+ const char *name)
+{
+ struct amdgpu_i2c_chan *i2c;
+ int ret;
+
+ /* don't add the mm_i2c bus unless hw_i2c is enabled */
+ if (rec->mm_i2c && (amdgpu_hw_i2c == 0))
+ return NULL;
+
+ i2c = kzalloc(sizeof(struct amdgpu_i2c_chan), GFP_KERNEL);
+ if (i2c == NULL)
+ return NULL;
+
+ i2c->rec = *rec;
+ i2c->adapter.owner = THIS_MODULE;
+ i2c->adapter.class = I2C_CLASS_DDC;
+ i2c->adapter.dev.parent = dev->dev;
+ i2c->dev = dev;
+ i2c_set_adapdata(&i2c->adapter, i2c);
+ mutex_init(&i2c->mutex);
+ if (rec->hw_capable &&
+ amdgpu_hw_i2c) {
+ /* hw i2c using atom */
+ snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+ "AMDGPU i2c hw bus %s", name);
+ i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
+ ret = i2c_add_adapter(&i2c->adapter);
+ if (ret)
+ goto out_free;
+ } else {
+ /* set the amdgpu bit adapter */
+ snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+ "AMDGPU i2c bit bus %s", name);
+ i2c->adapter.algo_data = &i2c->bit;
+ i2c->bit.pre_xfer = amdgpu_i2c_pre_xfer;
+ i2c->bit.post_xfer = amdgpu_i2c_post_xfer;
+ i2c->bit.setsda = amdgpu_i2c_set_data;
+ i2c->bit.setscl = amdgpu_i2c_set_clock;
+ i2c->bit.getsda = amdgpu_i2c_get_data;
+ i2c->bit.getscl = amdgpu_i2c_get_clock;
+ i2c->bit.udelay = 10;
+ i2c->bit.timeout = usecs_to_jiffies(2200); /* from VESA */
+ i2c->bit.data = i2c;
+ ret = i2c_bit_add_bus(&i2c->adapter);
+ if (ret) {
+ DRM_ERROR("Failed to register bit i2c %s\n", name);
+ goto out_free;
+ }
+ }
+
+ return i2c;
+out_free:
+ kfree(i2c);
+ return NULL;
+
+}
+
+void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
+{
+ if (!i2c)
+ return;
+ WARN_ON(i2c->has_aux);
+ i2c_del_adapter(&i2c->adapter);
+ kfree(i2c);
+}
+
+/* Add the default buses */
+void amdgpu_i2c_init(struct amdgpu_device *adev)
+{
+ if (amdgpu_hw_i2c)
+ DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n");
+
+ amdgpu_atombios_i2c_init(adev);
+}
+
+/* remove all the buses */
+void amdgpu_i2c_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
+ if (adev->i2c_bus[i]) {
+ amdgpu_i2c_destroy(adev->i2c_bus[i]);
+ adev->i2c_bus[i] = NULL;
+ }
+ }
+}
+
+/* Add additional buses */
+void amdgpu_i2c_add(struct amdgpu_device *adev,
+ const struct amdgpu_i2c_bus_rec *rec,
+ const char *name)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
+ if (!adev->i2c_bus[i]) {
+ adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name);
+ return;
+ }
+ }
+}
+
+/* looks up bus based on id */
+struct amdgpu_i2c_chan *
+amdgpu_i2c_lookup(struct amdgpu_device *adev,
+ const struct amdgpu_i2c_bus_rec *i2c_bus)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
+ if (adev->i2c_bus[i] &&
+ (adev->i2c_bus[i]->rec.i2c_id == i2c_bus->i2c_id)) {
+ return adev->i2c_bus[i];
+ }
+ }
+ return NULL;
+}
+
+static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+ u8 slave_addr,
+ u8 addr,
+ u8 *val)
+{
+ u8 out_buf[2];
+ u8 in_buf[2];
+ struct i2c_msg msgs[] = {
+ {
+ .addr = slave_addr,
+ .flags = 0,
+ .len = 1,
+ .buf = out_buf,
+ },
+ {
+ .addr = slave_addr,
+ .flags = I2C_M_RD,
+ .len = 1,
+ .buf = in_buf,
+ }
+ };
+
+ out_buf[0] = addr;
+ out_buf[1] = 0;
+
+ if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) {
+ *val = in_buf[0];
+ DRM_DEBUG("val = 0x%02x\n", *val);
+ } else {
+ DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
+ addr, *val);
+ }
+}
+
+static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+ u8 slave_addr,
+ u8 addr,
+ u8 val)
+{
+ uint8_t out_buf[2];
+ struct i2c_msg msg = {
+ .addr = slave_addr,
+ .flags = 0,
+ .len = 2,
+ .buf = out_buf,
+ };
+
+ out_buf[0] = addr;
+ out_buf[1] = val;
+
+ if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
+ DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
+ addr, val);
+}
+
+/* ddc router switching */
+void
+amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector)
+{
+ u8 val = 0;
+
+ if (!amdgpu_connector->router.ddc_valid)
+ return;
+
+ if (!amdgpu_connector->router_bus)
+ return;
+
+ amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x3, &val);
+ val &= ~amdgpu_connector->router.ddc_mux_control_pin;
+ amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x3, val);
+ amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x1, &val);
+ val &= ~amdgpu_connector->router.ddc_mux_control_pin;
+ val |= amdgpu_connector->router.ddc_mux_state;
+ amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x1, val);
+}
+
+/* clock/data router switching */
+void
+amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector)
+{
+ u8 val;
+
+ if (!amdgpu_connector->router.cd_valid)
+ return;
+
+ if (!amdgpu_connector->router_bus)
+ return;
+
+ amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x3, &val);
+ val &= ~amdgpu_connector->router.cd_mux_control_pin;
+ amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x3, val);
+ amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x1, &val);
+ val &= ~amdgpu_connector->router.cd_mux_control_pin;
+ val |= amdgpu_connector->router.cd_mux_state;
+ amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+ amdgpu_connector->router.i2c_addr,
+ 0x1, val);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
new file mode 100644
index 000000000..63c2ff749
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_I2C_H__
+#define __AMDGPU_I2C_H__
+
+struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+ const struct amdgpu_i2c_bus_rec *rec,
+ const char *name);
+void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
+void amdgpu_i2c_init(struct amdgpu_device *adev);
+void amdgpu_i2c_fini(struct amdgpu_device *adev);
+void amdgpu_i2c_add(struct amdgpu_device *adev,
+ const struct amdgpu_i2c_bus_rec *rec,
+ const char *name);
+struct amdgpu_i2c_chan *
+amdgpu_i2c_lookup(struct amdgpu_device *adev,
+ const struct amdgpu_i2c_bus_rec *i2c_bus);
+void
+amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *connector);
+void
+amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *connector);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
new file mode 100644
index 000000000..6aa3b1d84
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ * Christian König
+ */
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
+#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000)
+
+/*
+ * IB
+ * IBs (Indirect Buffers) and areas of GPU accessible memory where
+ * commands are stored. You can put a pointer to the IB in the
+ * command ring and the hw will fetch the commands from the IB
+ * and execute them. Generally userspace acceleration drivers
+ * produce command buffers which are send to the kernel and
+ * put in IBs for execution by the requested ring.
+ */
+
+/**
+ * amdgpu_ib_get - request an IB (Indirect Buffer)
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm pointer
+ * @size: requested IB size
+ * @pool_type: IB pool type (delayed, immediate, direct)
+ * @ib: IB object returned
+ *
+ * Request an IB (all asics). IBs are allocated using the
+ * suballocator.
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned int size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_ib *ib)
+{
+ int r;
+
+ if (size) {
+ r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
+ &ib->sa_bo, size);
+ if (r) {
+ dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
+ return r;
+ }
+
+ ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);
+ /* flush the cache before commit the IB */
+ ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
+
+ if (!vm)
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ib_free - free an IB (Indirect Buffer)
+ *
+ * @adev: amdgpu_device pointer
+ * @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
+ *
+ * Free an IB (all asics).
+ */
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
+ struct dma_fence *f)
+{
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+}
+
+/**
+ * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
+ *
+ * @ring: ring index the IB is associated with
+ * @num_ibs: number of IBs to schedule
+ * @ibs: IB objects to schedule
+ * @job: job to schedule
+ * @f: fence created during this submission
+ *
+ * Schedule an IB on the associated ring (all asics).
+ * Returns 0 on success, error on failure.
+ *
+ * On SI, there are two parallel engines fed from the primary ring,
+ * the CE (Constant Engine) and the DE (Drawing Engine). Since
+ * resource descriptors have moved to memory, the CE allows you to
+ * prime the caches while the DE is updating register state so that
+ * the resource descriptors will be already in cache when the draw is
+ * processed. To accomplish this, the userspace driver submits two
+ * IBs, one for the CE and one for the DE. If there is a CE IB (called
+ * a CONST_IB), it will be put on the ring prior to the DE IB. Prior
+ * to SI there was just a DE IB.
+ */
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
+ struct amdgpu_ib *ibs, struct amdgpu_job *job,
+ struct dma_fence **f)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib *ib = &ibs[0];
+ struct dma_fence *tmp = NULL;
+ bool need_ctx_switch;
+ unsigned int patch_offset = ~0;
+ struct amdgpu_vm *vm;
+ uint64_t fence_ctx;
+ uint32_t status = 0, alloc_size;
+ unsigned int fence_flags = 0;
+ bool secure, init_shadow;
+ u64 shadow_va, csa_va, gds_va;
+ int vmid = AMDGPU_JOB_GET_VMID(job);
+
+ unsigned int i;
+ int r = 0;
+ bool need_pipe_sync = false;
+
+ if (num_ibs == 0)
+ return -EINVAL;
+
+ /* ring tests don't use a job */
+ if (job) {
+ vm = job->vm;
+ fence_ctx = job->base.s_fence ?
+ job->base.s_fence->scheduled.context : 0;
+ shadow_va = job->shadow_va;
+ csa_va = job->csa_va;
+ gds_va = job->gds_va;
+ init_shadow = job->init_shadow;
+ } else {
+ vm = NULL;
+ fence_ctx = 0;
+ shadow_va = 0;
+ csa_va = 0;
+ gds_va = 0;
+ init_shadow = false;
+ }
+
+ if (!ring->sched.ready && !ring->is_mes_queue) {
+ dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
+ return -EINVAL;
+ }
+
+ if (vm && !job->vmid && !ring->is_mes_queue) {
+ dev_err(adev->dev, "VM IB without ID\n");
+ return -EINVAL;
+ }
+
+ if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
+ (!ring->funcs->secure_submission_supported)) {
+ dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
+ return -EINVAL;
+ }
+
+ alloc_size = ring->funcs->emit_frame_size + num_ibs *
+ ring->funcs->emit_ib_size;
+
+ r = amdgpu_ring_alloc(ring, alloc_size);
+ if (r) {
+ dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
+ return r;
+ }
+
+ need_ctx_switch = ring->current_ctx != fence_ctx;
+ if (ring->funcs->emit_pipeline_sync && job &&
+ ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
+ (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
+ amdgpu_vm_need_pipeline_sync(ring, job))) {
+ need_pipe_sync = true;
+
+ if (tmp)
+ trace_amdgpu_ib_pipe_sync(job, tmp);
+
+ dma_fence_put(tmp);
+ }
+
+ if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
+ ring->funcs->emit_mem_sync(ring);
+
+ if (ring->funcs->emit_wave_limit &&
+ ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ ring->funcs->emit_wave_limit(ring, true);
+
+ if (ring->funcs->insert_start)
+ ring->funcs->insert_start(ring);
+
+ if (job) {
+ r = amdgpu_vm_flush(ring, job, need_pipe_sync);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ return r;
+ }
+ }
+
+ amdgpu_ring_ib_begin(ring);
+
+ if (ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+ init_shadow, vmid);
+
+ if (ring->funcs->init_cond_exec)
+ patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+ amdgpu_device_flush_hdp(adev, ring);
+
+ if (need_ctx_switch)
+ status |= AMDGPU_HAVE_CTX_SWITCH;
+
+ if (job && ring->funcs->emit_cntxcntl) {
+ status |= job->preamble_status;
+ status |= job->preemption_status;
+ amdgpu_ring_emit_cntxcntl(ring, status);
+ }
+
+ /* Setup initial TMZiness and send it off.
+ */
+ secure = false;
+ if (job && ring->funcs->emit_frame_cntl) {
+ secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
+ amdgpu_ring_emit_frame_cntl(ring, true, secure);
+ }
+
+ for (i = 0; i < num_ibs; ++i) {
+ ib = &ibs[i];
+
+ if (job && ring->funcs->emit_frame_cntl) {
+ if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+ amdgpu_ring_emit_frame_cntl(ring, false, secure);
+ secure = !secure;
+ amdgpu_ring_emit_frame_cntl(ring, true, secure);
+ }
+ }
+
+ amdgpu_ring_emit_ib(ring, job, ib, status);
+ status &= ~AMDGPU_HAVE_CTX_SWITCH;
+ }
+
+ if (job && ring->funcs->emit_frame_cntl)
+ amdgpu_ring_emit_frame_cntl(ring, false, secure);
+
+ amdgpu_device_invalidate_hdp(adev, ring);
+
+ if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+ fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* wrap the last IB with fence */
+ if (job && job->uf_addr) {
+ amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
+ fence_flags | AMDGPU_FENCE_FLAG_64BIT);
+ }
+
+ if (ring->funcs->emit_gfx_shadow) {
+ amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
+
+ if (ring->funcs->init_cond_exec) {
+ unsigned int ce_offset = ~0;
+
+ ce_offset = amdgpu_ring_init_cond_exec(ring);
+ if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, ce_offset);
+ }
+ }
+
+ r = amdgpu_fence_emit(ring, f, job, fence_flags);
+ if (r) {
+ dev_err(adev->dev, "failed to emit fence (%d)\n", r);
+ if (job && job->vmid)
+ amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
+ amdgpu_ring_undo(ring);
+ return r;
+ }
+
+ if (ring->funcs->insert_end)
+ ring->funcs->insert_end(ring);
+
+ if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
+ ring->current_ctx = fence_ctx;
+ if (vm && ring->funcs->emit_switch_buffer)
+ amdgpu_ring_emit_switch_buffer(ring);
+
+ if (ring->funcs->emit_wave_limit &&
+ ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ ring->funcs->emit_wave_limit(ring, false);
+
+ amdgpu_ring_ib_end(ring);
+ amdgpu_ring_commit(ring);
+ return 0;
+}
+
+/**
+ * amdgpu_ib_pool_init - Init the IB (Indirect Buffer) pool
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the suballocator to manage a pool of memory
+ * for use as IBs (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ib_pool_init(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ if (adev->ib_pool_ready)
+ return 0;
+
+ for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
+ r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
+ AMDGPU_IB_POOL_SIZE, 256,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto error;
+ }
+ adev->ib_pool_ready = true;
+
+ return 0;
+
+error:
+ while (i--)
+ amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+ return r;
+}
+
+/**
+ * amdgpu_ib_pool_fini - Free the IB (Indirect Buffer) pool
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the suballocator managing the pool of memory
+ * for use as IBs (all asics).
+ */
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->ib_pool_ready)
+ return;
+
+ for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
+ amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+ adev->ib_pool_ready = false;
+}
+
+/**
+ * amdgpu_ib_ring_tests - test IBs on the rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Test an IB (Indirect Buffer) on each ring.
+ * If the test fails, disable the ring.
+ * Returns 0 on success, error if the primary GFX ring
+ * IB test fails.
+ */
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
+{
+ long tmo_gfx, tmo_mm;
+ int r, ret = 0;
+ unsigned int i;
+
+ tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+ if (amdgpu_sriov_vf(adev)) {
+ /* for MM engines in hypervisor side they are not scheduled together
+ * with CP and SDMA engines, so even in exclusive mode MM engine could
+ * still running on other VF thus the IB TEST TIMEOUT for MM engines
+ * under SR-IOV should be set to a long time. 8 sec should be enough
+ * for the MM comes back to this VF.
+ */
+ tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+ }
+
+ if (amdgpu_sriov_runtime(adev)) {
+ /* for CP & SDMA engines since they are scheduled together so
+ * need to make the timeout width enough to cover the time
+ * cost waiting for it coming back under RUNTIME only
+ */
+ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+ } else if (adev->gmc.xgmi.hive_id) {
+ tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
+ }
+
+ for (i = 0; i < adev->num_rings; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ long tmo;
+
+ /* KIQ rings don't have an IB test because we never submit IBs
+ * to them and they have no interrupt support.
+ */
+ if (!ring->sched.ready || !ring->funcs->test_ib)
+ continue;
+
+ if (adev->enable_mes &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ continue;
+
+ /* MM engine need more time */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+ tmo = tmo_mm;
+ else
+ tmo = tmo_gfx;
+
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (!r) {
+ DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
+ ring->name);
+ continue;
+ }
+
+ ring->sched.ready = false;
+ DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
+ ring->name, r);
+
+ if (ring == &adev->gfx.gfx_ring[0]) {
+ /* oh, oh, that's really bad */
+ adev->accel_working = false;
+ return r;
+
+ } else {
+ ret = r;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+
+ seq_puts(m, "--------------------- DELAYED ---------------------\n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
+ m);
+ seq_puts(m, "-------------------- IMMEDIATE --------------------\n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
+ m);
+ seq_puts(m, "--------------------- DIRECT ----------------------\n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_sa_info);
+
+#endif
+
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_sa_info", 0444, root, adev,
+ &amdgpu_debugfs_sa_info_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
new file mode 100644
index 000000000..ff1ea9929
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ids.h"
+
+#include <linux/idr.h>
+#include <linux/dma-fence-array.h>
+
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+/*
+ * PASID manager
+ *
+ * PASIDs are global address space identifiers that can be shared
+ * between the GPU, an IOMMU and the driver. VMs on different devices
+ * may use the same PASID if they share the same address
+ * space. Therefore PASIDs are allocated using a global IDA. VMs are
+ * looked up from the PASID per amdgpu_device.
+ */
+static DEFINE_IDA(amdgpu_pasid_ida);
+
+/* Helper to free pasid from a fence callback */
+struct amdgpu_pasid_cb {
+ struct dma_fence_cb cb;
+ u32 pasid;
+};
+
+/**
+ * amdgpu_pasid_alloc - Allocate a PASID
+ * @bits: Maximum width of the PASID in bits, must be at least 1
+ *
+ * Allocates a PASID of the given width while keeping smaller PASIDs
+ * available if possible.
+ *
+ * Returns a positive integer on success. Returns %-EINVAL if bits==0.
+ * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
+ * memory allocation failure.
+ */
+int amdgpu_pasid_alloc(unsigned int bits)
+{
+ int pasid = -EINVAL;
+
+ for (bits = min(bits, 31U); bits > 0; bits--) {
+ pasid = ida_simple_get(&amdgpu_pasid_ida,
+ 1U << (bits - 1), 1U << bits,
+ GFP_KERNEL);
+ if (pasid != -ENOSPC)
+ break;
+ }
+
+ if (pasid >= 0)
+ trace_amdgpu_pasid_allocated(pasid);
+
+ return pasid;
+}
+
+/**
+ * amdgpu_pasid_free - Free a PASID
+ * @pasid: PASID to free
+ */
+void amdgpu_pasid_free(u32 pasid)
+{
+ trace_amdgpu_pasid_freed(pasid);
+ ida_simple_remove(&amdgpu_pasid_ida, pasid);
+}
+
+static void amdgpu_pasid_free_cb(struct dma_fence *fence,
+ struct dma_fence_cb *_cb)
+{
+ struct amdgpu_pasid_cb *cb =
+ container_of(_cb, struct amdgpu_pasid_cb, cb);
+
+ amdgpu_pasid_free(cb->pasid);
+ dma_fence_put(fence);
+ kfree(cb);
+}
+
+/**
+ * amdgpu_pasid_free_delayed - free pasid when fences signal
+ *
+ * @resv: reservation object with the fences to wait for
+ * @pasid: pasid to free
+ *
+ * Free the pasid only after all the fences in resv are signaled.
+ */
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
+ u32 pasid)
+{
+ struct amdgpu_pasid_cb *cb;
+ struct dma_fence *fence;
+ int r;
+
+ r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+ if (r)
+ goto fallback;
+
+ if (!fence) {
+ amdgpu_pasid_free(pasid);
+ return;
+ }
+
+ cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb) {
+ /* Last resort when we are OOM */
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ amdgpu_pasid_free(pasid);
+ } else {
+ cb->pasid = pasid;
+ if (dma_fence_add_callback(fence, &cb->cb,
+ amdgpu_pasid_free_cb))
+ amdgpu_pasid_free_cb(fence, &cb->cb);
+ }
+
+ return;
+
+fallback:
+ /* Not enough memory for the delayed delete, as last resort
+ * block for all the fences to complete.
+ */
+ dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ amdgpu_pasid_free(pasid);
+}
+
+/*
+ * VMID manager
+ *
+ * VMIDs are a per VMHUB identifier for page tables handling.
+ */
+
+/**
+ * amdgpu_vmid_had_gpu_reset - check if reset occured since last use
+ *
+ * @adev: amdgpu_device pointer
+ * @id: VMID structure
+ *
+ * Check if GPU reset occured since last use of the VMID.
+ */
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+ struct amdgpu_vmid *id)
+{
+ return id->current_gpu_reset_count !=
+ atomic_read(&adev->gpu_reset_counter);
+}
+
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->gds_base != job->gds_base ||
+ id->gds_size != job->gds_size ||
+ id->gws_base != job->gws_base ||
+ id->gws_size != job->gws_size ||
+ id->oa_base != job->oa_base ||
+ id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+ struct amdgpu_job *job)
+{
+ return id->pd_gpu_addr == job->vm_pd_addr &&
+ !amdgpu_vmid_gds_switch_needed(id, job);
+}
+
+/**
+ * amdgpu_vmid_grab_idle - grab idle VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @idle: resulting idle VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Try to find an idle VMID, if none is idle add a fence to wait to the sync
+ * object. Returns -ENOMEM when we are out of memory.
+ */
+static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_vmid **idle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct dma_fence **fences;
+ unsigned i;
+
+ if (!dma_fence_is_signaled(ring->vmid_wait)) {
+ *fence = dma_fence_get(ring->vmid_wait);
+ return 0;
+ }
+
+ fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
+ if (!fences)
+ return -ENOMEM;
+
+ /* Check if we have an idle VMID */
+ i = 0;
+ list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+ /* Don't use per engine and per process VMID at the same time */
+ struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
+ NULL : ring;
+
+ fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
+ if (!fences[i])
+ break;
+ ++i;
+ }
+
+ /* If we can't find a idle VMID to use, wait till one becomes available */
+ if (&(*idle)->list == &id_mgr->ids_lru) {
+ u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+ unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+ struct dma_fence_array *array;
+ unsigned j;
+
+ *idle = NULL;
+ for (j = 0; j < i; ++j)
+ dma_fence_get(fences[j]);
+
+ array = dma_fence_array_create(i, fences, fence_context,
+ seqno, true);
+ if (!array) {
+ for (j = 0; j < i; ++j)
+ dma_fence_put(fences[j]);
+ kfree(fences);
+ return -ENOMEM;
+ }
+
+ *fence = dma_fence_get(&array->base);
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = &array->base;
+ return 0;
+ }
+ kfree(fences);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vmid_grab_reserved - try to assign reserved VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Try to assign a reserved VMID.
+ */
+static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_vmid **id,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ uint64_t fence_context = adev->fence_context + ring->idx;
+ bool needs_flush = vm->use_cpu_for_update;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
+ int r;
+
+ *id = id_mgr->reserved;
+ if ((*id)->owner != vm->immediate.fence_context ||
+ !amdgpu_vmid_compatible(*id, job) ||
+ (*id)->flushed_updates < updates ||
+ !(*id)->last_flush ||
+ ((*id)->last_flush->context != fence_context &&
+ !dma_fence_is_signaled((*id)->last_flush))) {
+ struct dma_fence *tmp;
+
+ /* Don't use per engine and per process VMID at the same time */
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
+ /* to prevent one context starved by another context */
+ (*id)->pd_gpu_addr = 0;
+ tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+ if (tmp) {
+ *id = NULL;
+ *fence = dma_fence_get(tmp);
+ return 0;
+ }
+ needs_flush = true;
+ }
+
+ /* Good we can use this VMID. Remember this submission as
+ * user of the VMID.
+ */
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
+ if (r)
+ return r;
+
+ job->vm_needs_flush = needs_flush;
+ job->spm_update_needed = true;
+ return 0;
+}
+
+/**
+ * amdgpu_vmid_grab_used - try to reuse a VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Try to reuse a VMID for this submission.
+ */
+static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_vmid **id,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ uint64_t fence_context = adev->fence_context + ring->idx;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
+ int r;
+
+ job->vm_needs_flush = vm->use_cpu_for_update;
+
+ /* Check if we can use a VMID already assigned to this VM */
+ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
+ bool needs_flush = vm->use_cpu_for_update;
+
+ /* Check all the prerequisites to using this VMID */
+ if ((*id)->owner != vm->immediate.fence_context)
+ continue;
+
+ if (!amdgpu_vmid_compatible(*id, job))
+ continue;
+
+ if (!(*id)->last_flush ||
+ ((*id)->last_flush->context != fence_context &&
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->flushed_updates < updates)
+ needs_flush = true;
+
+ if (needs_flush && !adev->vm_manager.concurrent_flush)
+ continue;
+
+ /* Good, we can use this VMID. Remember this submission as
+ * user of the VMID.
+ */
+ r = amdgpu_sync_fence(&(*id)->active,
+ &job->base.s_fence->finished);
+ if (r)
+ return r;
+
+ job->vm_needs_flush |= needs_flush;
+ return 0;
+ }
+
+ *id = NULL;
+ return 0;
+}
+
+/**
+ * amdgpu_vmid_grab - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_job *job, struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *idle = NULL;
+ struct amdgpu_vmid *id = NULL;
+ int r = 0;
+
+ mutex_lock(&id_mgr->lock);
+ r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence);
+ if (r || !idle)
+ goto error;
+
+ if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
+ r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
+ if (r || !id)
+ goto error;
+ } else {
+ r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence);
+ if (r)
+ goto error;
+
+ if (!id) {
+ /* Still no ID to use? Then use the idle one found earlier */
+ id = idle;
+
+ /* Remember this submission as user of the VMID */
+ r = amdgpu_sync_fence(&id->active,
+ &job->base.s_fence->finished);
+ if (r)
+ goto error;
+
+ job->vm_needs_flush = true;
+ }
+
+ list_move_tail(&id->list, &id_mgr->ids_lru);
+ }
+
+ job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
+ if (job->vm_needs_flush) {
+ id->flushed_updates = amdgpu_vm_tlb_seq(vm);
+ dma_fence_put(id->last_flush);
+ id->last_flush = NULL;
+ }
+ job->vmid = id - id_mgr->ids;
+ job->pasid = vm->pasid;
+
+ id->gds_base = job->gds_base;
+ id->gds_size = job->gds_size;
+ id->gws_base = job->gws_base;
+ id->gws_size = job->gws_size;
+ id->oa_base = job->oa_base;
+ id->oa_size = job->oa_size;
+ id->pd_gpu_addr = job->vm_pd_addr;
+ id->owner = vm->immediate.fence_context;
+
+ trace_amdgpu_vm_grab_id(vm, ring, job);
+
+error:
+ mutex_unlock(&id_mgr->lock);
+ return r;
+}
+
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+ unsigned vmhub)
+{
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+ mutex_lock(&id_mgr->lock);
+
+ ++id_mgr->reserved_use_count;
+ if (!id_mgr->reserved) {
+ struct amdgpu_vmid *id;
+
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
+ list);
+ /* Remove from normal round robin handling */
+ list_del_init(&id->list);
+ id_mgr->reserved = id;
+ }
+
+ mutex_unlock(&id_mgr->lock);
+ return 0;
+}
+
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+ unsigned vmhub)
+{
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+ mutex_lock(&id_mgr->lock);
+ if (!--id_mgr->reserved_use_count) {
+ /* give the reserved ID back to normal round robin */
+ list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
+ id_mgr->reserved = NULL;
+ }
+
+ mutex_unlock(&id_mgr->lock);
+}
+
+/**
+ * amdgpu_vmid_reset - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ * @vmhub: vmhub type
+ * @vmid: vmid number to use
+ *
+ * Reset saved GDW, GWS and OA to force switch on next flush.
+ */
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+ unsigned vmid)
+{
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id = &id_mgr->ids[vmid];
+
+ mutex_lock(&id_mgr->lock);
+ id->owner = 0;
+ id->gds_base = 0;
+ id->gds_size = 0;
+ id->gws_base = 0;
+ id->gws_size = 0;
+ id->oa_base = 0;
+ id->oa_size = 0;
+ mutex_unlock(&id_mgr->lock);
+}
+
+/**
+ * amdgpu_vmid_reset_all - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev)
+{
+ unsigned i, j;
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+ struct amdgpu_vmid_mgr *id_mgr =
+ &adev->vm_manager.id_mgr[i];
+
+ for (j = 1; j < id_mgr->num_ids; ++j)
+ amdgpu_vmid_reset(adev, i, j);
+ }
+}
+
+/**
+ * amdgpu_vmid_mgr_init - init the VMID manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
+{
+ unsigned i, j;
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+ struct amdgpu_vmid_mgr *id_mgr =
+ &adev->vm_manager.id_mgr[i];
+
+ mutex_init(&id_mgr->lock);
+ INIT_LIST_HEAD(&id_mgr->ids_lru);
+ id_mgr->reserved_use_count = 0;
+
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+
+ /* skip over VMID 0, since it is the system VM */
+ for (j = 1; j < id_mgr->num_ids; ++j) {
+ amdgpu_vmid_reset(adev, i, j);
+ amdgpu_sync_create(&id_mgr->ids[j].active);
+ list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
+ }
+ }
+ /* alloc a default reserved vmid to enforce isolation */
+ if (enforce_isolation)
+ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+
+}
+
+/**
+ * amdgpu_vmid_mgr_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
+{
+ unsigned i, j;
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+ struct amdgpu_vmid_mgr *id_mgr =
+ &adev->vm_manager.id_mgr[i];
+
+ mutex_destroy(&id_mgr->lock);
+ for (j = 0; j < AMDGPU_NUM_VMID; ++j) {
+ struct amdgpu_vmid *id = &id_mgr->ids[j];
+
+ amdgpu_sync_free(&id->active);
+ dma_fence_put(id->last_flush);
+ dma_fence_put(id->pasid_mapping);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
new file mode 100644
index 000000000..fa8c42c83
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_IDS_H__
+#define __AMDGPU_IDS_H__
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/dma-fence.h>
+
+#include "amdgpu_sync.h"
+
+/* maximum number of VMIDs */
+#define AMDGPU_NUM_VMID 16
+
+struct amdgpu_device;
+struct amdgpu_vm;
+struct amdgpu_ring;
+struct amdgpu_sync;
+struct amdgpu_job;
+
+struct amdgpu_vmid {
+ struct list_head list;
+ struct amdgpu_sync active;
+ struct dma_fence *last_flush;
+ uint64_t owner;
+
+ uint64_t pd_gpu_addr;
+ /* last flushed PD/PT update */
+ uint64_t flushed_updates;
+
+ uint32_t current_gpu_reset_count;
+
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_base;
+ uint32_t oa_size;
+
+ unsigned pasid;
+ struct dma_fence *pasid_mapping;
+};
+
+struct amdgpu_vmid_mgr {
+ struct mutex lock;
+ unsigned num_ids;
+ struct list_head ids_lru;
+ struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
+ struct amdgpu_vmid *reserved;
+ unsigned int reserved_use_count;
+};
+
+int amdgpu_pasid_alloc(unsigned int bits);
+void amdgpu_pasid_free(u32 pasid);
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
+ u32 pasid);
+
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+ struct amdgpu_vmid *id);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+ unsigned vmhub);
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+ unsigned vmhub);
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_job *job, struct dma_fence **fence);
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+ unsigned vmid);
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
+
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev);
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
new file mode 100644
index 000000000..f3b0aaf3e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/dma-mapping.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+/**
+ * amdgpu_ih_ring_init - initialize the IH state
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to initialize
+ * @ring_size: ring size to allocate
+ * @use_bus_addr: true when we can use dma_alloc_coherent
+ *
+ * Initializes the IH state and allocates a buffer
+ * for the IH ring buffer.
+ * Returns 0 for success, errors for failure.
+ */
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ unsigned ring_size, bool use_bus_addr)
+{
+ u32 rb_bufsz;
+ int r;
+
+ /* Align ring size */
+ rb_bufsz = order_base_2(ring_size / 4);
+ ring_size = (1 << rb_bufsz) * 4;
+ ih->ring_size = ring_size;
+ ih->ptr_mask = ih->ring_size - 1;
+ ih->rptr = 0;
+ ih->use_bus_addr = use_bus_addr;
+
+ if (use_bus_addr) {
+ dma_addr_t dma_addr;
+
+ if (ih->ring)
+ return 0;
+
+ /* add 8 bytes for the rptr/wptr shadows and
+ * add them to the end of the ring allocation.
+ */
+ ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
+ &dma_addr, GFP_KERNEL);
+ if (ih->ring == NULL)
+ return -ENOMEM;
+
+ ih->gpu_addr = dma_addr;
+ ih->wptr_addr = dma_addr + ih->ring_size;
+ ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
+ ih->rptr_addr = dma_addr + ih->ring_size + 4;
+ ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
+ } else {
+ unsigned wptr_offs, rptr_offs;
+
+ r = amdgpu_device_wb_get(adev, &wptr_offs);
+ if (r)
+ return r;
+
+ r = amdgpu_device_wb_get(adev, &rptr_offs);
+ if (r) {
+ amdgpu_device_wb_free(adev, wptr_offs);
+ return r;
+ }
+
+ r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ih->ring_obj, &ih->gpu_addr,
+ (void **)&ih->ring);
+ if (r) {
+ amdgpu_device_wb_free(adev, rptr_offs);
+ amdgpu_device_wb_free(adev, wptr_offs);
+ return r;
+ }
+
+ ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
+ ih->wptr_cpu = &adev->wb.wb[wptr_offs];
+ ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
+ ih->rptr_cpu = &adev->wb.wb[rptr_offs];
+ }
+
+ init_waitqueue_head(&ih->wait_process);
+ return 0;
+}
+
+/**
+ * amdgpu_ih_ring_fini - tear down the IH state
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to tear down
+ *
+ * Tears down the IH state and frees buffer
+ * used for the IH ring buffer.
+ */
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+
+ if (!ih->ring)
+ return;
+
+ if (ih->use_bus_addr) {
+
+ /* add 8 bytes for the rptr/wptr shadows and
+ * add them to the end of the ring allocation.
+ */
+ dma_free_coherent(adev->dev, ih->ring_size + 8,
+ (void *)ih->ring, ih->gpu_addr);
+ ih->ring = NULL;
+ } else {
+ amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
+ (void **)&ih->ring);
+ amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
+ amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
+ }
+}
+
+/**
+ * amdgpu_ih_ring_write - write IV to the ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to write to
+ * @iv: the iv to write
+ * @num_dw: size of the iv in dw
+ *
+ * Writes an IV to the ring buffer using the CPU and increment the wptr.
+ * Used for testing and delegating IVs to a software ring.
+ */
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw)
+{
+ uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
+ unsigned int i;
+
+ for (i = 0; i < num_dw; ++i)
+ ih->ring[wptr++] = cpu_to_le32(iv[i]);
+
+ wptr <<= 2;
+ wptr &= ih->ptr_mask;
+
+ /* Only commit the new wptr if we don't overflow */
+ if (wptr != READ_ONCE(ih->rptr)) {
+ wmb();
+ WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+ } else if (adev->irq.retry_cam_enabled) {
+ dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+ wptr, ih->rptr);
+ }
+}
+
+/**
+ * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Used to ensure ring has processed IVs up to the checkpoint write pointer.
+ */
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t checkpoint_wptr;
+ uint64_t checkpoint_ts;
+ long timeout = HZ;
+
+ if (!ih->enabled || adev->shutdown)
+ return -ENODEV;
+
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ /* Order wptr with ring data. */
+ rmb();
+ checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+
+ return wait_event_interruptible_timeout(ih->wait_process,
+ amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
+ ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
+}
+
+/**
+ * amdgpu_ih_process - interrupt handler
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Interrupt hander (VI), walk the IH ring.
+ * Returns irq process return code.
+ */
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ unsigned int count;
+ u32 wptr;
+
+ if (!ih->enabled || adev->shutdown)
+ return IRQ_NONE;
+
+ wptr = amdgpu_ih_get_wptr(adev, ih);
+
+restart_ih:
+ count = AMDGPU_IH_MAX_NUM_IVS;
+ DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+
+ /* Order reading of wptr vs. reading of IH ring data */
+ rmb();
+
+ while (ih->rptr != wptr && --count) {
+ amdgpu_irq_dispatch(adev, ih);
+ ih->rptr &= ih->ptr_mask;
+ }
+
+ amdgpu_ih_set_rptr(adev, ih);
+ wake_up_all(&ih->wait_process);
+
+ /* make sure wptr hasn't changed while processing */
+ wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (wptr != ih->rptr)
+ goto restart_ih;
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * amdgpu_ih_decode_iv_helper - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ * @entry: IV entry
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position for Vega10
+ * and later GPUs.
+ */
+void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[8];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+ dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
+ dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
+ dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
+ dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
+
+ entry->client_id = dw[0] & 0xff;
+ entry->src_id = (dw[0] >> 8) & 0xff;
+ entry->ring_id = (dw[0] >> 16) & 0xff;
+ entry->vmid = (dw[0] >> 24) & 0xf;
+ entry->vmid_src = (dw[0] >> 31);
+ entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
+ entry->timestamp_src = dw[2] >> 31;
+ entry->pasid = dw[3] & 0xffff;
+ entry->node_id = (dw[3] >> 16) & 0xff;
+ entry->src_data[0] = dw[4];
+ entry->src_data[1] = dw[5];
+ entry->src_data[2] = dw[6];
+ entry->src_data[3] = dw[7];
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 32;
+}
+
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset)
+{
+ uint32_t iv_size = 32;
+ uint32_t ring_index;
+ uint32_t dw1, dw2;
+
+ rptr += iv_size * offset;
+ ring_index = (rptr & ih->ptr_mask) >> 2;
+
+ dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
+ return dw1 | ((u64)(dw2 & 0xffff) << 32);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
new file mode 100644
index 000000000..508f02eb0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IH_H__
+#define __AMDGPU_IH_H__
+
+/* Maximum number of IVs processed at once */
+#define AMDGPU_IH_MAX_NUM_IVS 32
+
+#define IH_RING_SIZE (256 * 1024)
+#define IH_SW_RING_SIZE (16 * 1024) /* enough for 512 CAM entries */
+
+struct amdgpu_device;
+struct amdgpu_iv_entry;
+
+struct amdgpu_ih_regs {
+ uint32_t ih_rb_base;
+ uint32_t ih_rb_base_hi;
+ uint32_t ih_rb_cntl;
+ uint32_t ih_rb_wptr;
+ uint32_t ih_rb_rptr;
+ uint32_t ih_doorbell_rptr;
+ uint32_t ih_rb_wptr_addr_lo;
+ uint32_t ih_rb_wptr_addr_hi;
+ uint32_t psp_reg_id;
+};
+
+/*
+ * R6xx+ IH ring
+ */
+struct amdgpu_ih_ring {
+ unsigned ring_size;
+ uint32_t ptr_mask;
+ u32 doorbell_index;
+ bool use_doorbell;
+ bool use_bus_addr;
+
+ struct amdgpu_bo *ring_obj;
+ volatile uint32_t *ring;
+ uint64_t gpu_addr;
+
+ uint64_t wptr_addr;
+ volatile uint32_t *wptr_cpu;
+
+ uint64_t rptr_addr;
+ volatile uint32_t *rptr_cpu;
+
+ bool enabled;
+ unsigned rptr;
+ struct amdgpu_ih_regs ih_regs;
+
+ /* For waiting on IH processing at checkpoint. */
+ wait_queue_head_t wait_process;
+ uint64_t processed_timestamp;
+};
+
+/* return true if time stamp t2 is after t1 with 48bit wrap around */
+#define amdgpu_ih_ts_after(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+
+/* provided by the ih block */
+struct amdgpu_ih_funcs {
+ /* ring read/write ptr handling, called from interrupt context */
+ u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+ void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry);
+ uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
+ void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+};
+
+#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
+#define amdgpu_ih_decode_iv(adev, iv) \
+ (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
+ (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
+ (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
+#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
+
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ unsigned ring_size, bool use_bus_addr);
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ const uint32_t *iv, unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih);
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry);
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+ signed int offset);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
new file mode 100644
index 000000000..484e93681
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IMU_H__
+#define __AMDGPU_IMU_H__
+
+enum imu_work_mode {
+ DEBUG_MODE,
+ MISSION_MODE
+};
+
+struct amdgpu_imu_funcs {
+ int (*init_microcode)(struct amdgpu_device *adev);
+ int (*load_microcode)(struct amdgpu_device *adev);
+ void (*setup_imu)(struct amdgpu_device *adev);
+ int (*start_imu)(struct amdgpu_device *adev);
+ void (*program_rlc_ram)(struct amdgpu_device *adev);
+ int (*wait_for_reset_status)(struct amdgpu_device *adev);
+};
+
+struct imu_rlc_ram_golden {
+ u32 hwip;
+ u32 instance;
+ u32 segment;
+ u32 reg;
+ u32 data;
+ u32 addr_mask;
+};
+
+#define IMU_RLC_RAM_GOLDEN_VALUE(ip, inst, reg, data, addr_mask) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, data, addr_mask }
+
+struct amdgpu_imu {
+ const struct amdgpu_imu_funcs *funcs;
+ enum imu_work_mode mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
new file mode 100644
index 000000000..a1cbd7c3d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -0,0 +1,45 @@
+/*
+ * \file amdgpu_ioc32.c
+ *
+ * 32-bit ioctl compatibility routines for the AMDGPU DRM.
+ *
+ * \author Paul Mackerras <paulus@samba.org>
+ *
+ * Copyright (C) Paul Mackerras 2005
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <linux/compat.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_ioctl.h>
+
+#include "amdgpu_drv.h"
+
+long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ unsigned int nr = DRM_IOCTL_NR(cmd);
+
+ if (nr < DRM_COMMAND_BASE)
+ return drm_compat_ioctl(filp, cmd, arg);
+
+ return amdgpu_drm_ioctl(filp, cmd, arg);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
new file mode 100644
index 000000000..fa6d0adce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -0,0 +1,756 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+/**
+ * DOC: Interrupt Handling
+ *
+ * Interrupts generated within GPU hardware raise interrupt requests that are
+ * passed to amdgpu IRQ handler which is responsible for detecting source and
+ * type of the interrupt and dispatching matching handlers. If handling an
+ * interrupt requires calling kernel functions that may sleep processing is
+ * dispatched to work handlers.
+ *
+ * If MSI functionality is not disabled by module parameter then MSI
+ * support will be enabled.
+ *
+ * For GPU interrupt sources that may be driven by another driver, IRQ domain
+ * support is used (with mapping between virtual and hardware IRQs).
+ */
+
+#include <linux/irq.h>
+#include <linux/pci.h>
+
+#include <drm/drm_vblank.h>
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "atom.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
+
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_DRM_AMD_DC
+#include "amdgpu_dm_irq.h"
+#endif
+
+#define AMDGPU_WAIT_IDLE_TIMEOUT 200
+
+const char *soc15_ih_clientid_name[] = {
+ "IH",
+ "SDMA2 or ACP",
+ "ATHUB",
+ "BIF",
+ "SDMA3 or DCE",
+ "SDMA4 or ISP",
+ "VMC1 or PCIE0",
+ "RLC",
+ "SDMA0",
+ "SDMA1",
+ "SE0SH",
+ "SE1SH",
+ "SE2SH",
+ "SE3SH",
+ "VCN1 or UVD1",
+ "THM",
+ "VCN or UVD",
+ "SDMA5 or VCE0",
+ "VMC",
+ "SDMA6 or XDMA",
+ "GRBM_CP",
+ "ATS",
+ "ROM_SMUIO",
+ "DF",
+ "SDMA7 or VCE1",
+ "PWR",
+ "reserved",
+ "UTCL2",
+ "EA",
+ "UTCL2LOG",
+ "MP0",
+ "MP1"
+};
+
+const int node_id_to_phys_map[NODEID_MAX] = {
+ [AID0_NODEID] = 0,
+ [XCD0_NODEID] = 0,
+ [XCD1_NODEID] = 1,
+ [AID1_NODEID] = 1,
+ [XCD2_NODEID] = 2,
+ [XCD3_NODEID] = 3,
+ [AID2_NODEID] = 2,
+ [XCD4_NODEID] = 4,
+ [XCD5_NODEID] = 5,
+ [AID3_NODEID] = 3,
+ [XCD6_NODEID] = 6,
+ [XCD7_NODEID] = 7,
+};
+
+/**
+ * amdgpu_irq_disable_all - disable *all* interrupts
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Disable all types of interrupts from all sources.
+ */
+void amdgpu_irq_disable_all(struct amdgpu_device *adev)
+{
+ unsigned long irqflags;
+ unsigned int i, j, k;
+ int r;
+
+ spin_lock_irqsave(&adev->irq.lock, irqflags);
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
+ continue;
+
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src || !src->funcs->set || !src->num_types)
+ continue;
+
+ for (k = 0; k < src->num_types; ++k) {
+ r = src->funcs->set(adev, src, k,
+ AMDGPU_IRQ_STATE_DISABLE);
+ if (r)
+ DRM_ERROR("error disabling interrupt (%d)\n",
+ r);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&adev->irq.lock, irqflags);
+}
+
+/**
+ * amdgpu_irq_handler - IRQ handler
+ *
+ * @irq: IRQ number (unused)
+ * @arg: pointer to DRM device
+ *
+ * IRQ handler for amdgpu driver (all ASICs).
+ *
+ * Returns:
+ * result of handling the IRQ, as defined by &irqreturn_t
+ */
+static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
+{
+ struct drm_device *dev = (struct drm_device *) arg;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ irqreturn_t ret;
+
+ ret = amdgpu_ih_process(adev, &adev->irq.ih);
+ if (ret == IRQ_HANDLED)
+ pm_runtime_mark_last_busy(dev->dev);
+
+ amdgpu_ras_interrupt_fatal_error_handler(adev);
+
+ return ret;
+}
+
+/**
+ * amdgpu_irq_handle_ih1 - kick of processing for IH1
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 1.
+ */
+static void amdgpu_irq_handle_ih1(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ irq.ih1_work);
+
+ amdgpu_ih_process(adev, &adev->irq.ih1);
+}
+
+/**
+ * amdgpu_irq_handle_ih2 - kick of processing for IH2
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 2.
+ */
+static void amdgpu_irq_handle_ih2(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ irq.ih2_work);
+
+ amdgpu_ih_process(adev, &adev->irq.ih2);
+}
+
+/**
+ * amdgpu_irq_handle_ih_soft - kick of processing for ih_soft
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH soft ring.
+ */
+static void amdgpu_irq_handle_ih_soft(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ irq.ih_soft_work);
+
+ amdgpu_ih_process(adev, &adev->irq.ih_soft);
+}
+
+/**
+ * amdgpu_msi_ok - check whether MSI functionality is enabled
+ *
+ * @adev: amdgpu device pointer (unused)
+ *
+ * Checks whether MSI functionality has been disabled via module parameter
+ * (all ASICs).
+ *
+ * Returns:
+ * *true* if MSIs are allowed to be enabled or *false* otherwise
+ */
+static bool amdgpu_msi_ok(struct amdgpu_device *adev)
+{
+ if (amdgpu_msi == 1)
+ return true;
+ else if (amdgpu_msi == 0)
+ return false;
+
+ return true;
+}
+
+static void amdgpu_restore_msix(struct amdgpu_device *adev)
+{
+ u16 ctrl;
+
+ pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+ if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
+ return;
+
+ /* VF FLR */
+ ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+ ctrl |= PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+}
+
+/**
+ * amdgpu_irq_init - initialize interrupt handling
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Sets up work functions for hotplug and reset interrupts, enables MSI
+ * functionality, initializes vblank, hotplug and reset interrupt handling.
+ *
+ * Returns:
+ * 0 on success or error code on failure
+ */
+int amdgpu_irq_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ unsigned int irq;
+
+ spin_lock_init(&adev->irq.lock);
+
+ /* Enable MSI if not disabled by module parameter */
+ adev->irq.msi_enabled = false;
+
+ if (amdgpu_msi_ok(adev)) {
+ int nvec = pci_msix_vec_count(adev->pdev);
+ unsigned int flags;
+
+ if (nvec <= 0)
+ flags = PCI_IRQ_MSI;
+ else
+ flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
+
+ /* we only need one vector */
+ nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (nvec > 0) {
+ adev->irq.msi_enabled = true;
+ dev_dbg(adev->dev, "using MSI/MSI-X.\n");
+ }
+ }
+
+ INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
+ INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
+ INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft);
+
+ /* Use vector 0 for MSI-X. */
+ r = pci_irq_vector(adev->pdev, 0);
+ if (r < 0)
+ return r;
+ irq = r;
+
+ /* PCI devices require shared interrupts. */
+ r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
+ adev_to_drm(adev));
+ if (r)
+ return r;
+ adev->irq.installed = true;
+ adev->irq.irq = irq;
+ adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
+
+ DRM_DEBUG("amdgpu: irq initialized.\n");
+ return 0;
+}
+
+
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
+{
+ if (adev->irq.installed) {
+ free_irq(adev->irq.irq, adev_to_drm(adev));
+ adev->irq.installed = false;
+ if (adev->irq.msi_enabled)
+ pci_free_irq_vectors(adev->pdev);
+ }
+
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+}
+
+/**
+ * amdgpu_irq_fini_sw - shut down interrupt handling
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Tears down work functions for hotplug and reset interrupts, disables MSI
+ * functionality, shuts down vblank, hotplug and reset interrupt handling,
+ * turns off interrupts from all sources (all ASICs).
+ */
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
+ continue;
+
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src)
+ continue;
+
+ kfree(src->enabled_types);
+ src->enabled_types = NULL;
+ }
+ kfree(adev->irq.client[i].sources);
+ adev->irq.client[i].sources = NULL;
+ }
+}
+
+/**
+ * amdgpu_irq_add_id - register IRQ source
+ *
+ * @adev: amdgpu device pointer
+ * @client_id: client id
+ * @src_id: source id
+ * @source: IRQ source pointer
+ *
+ * Registers IRQ source on a client.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+ unsigned int client_id, unsigned int src_id,
+ struct amdgpu_irq_src *source)
+{
+ if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
+ return -EINVAL;
+
+ if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
+ return -EINVAL;
+
+ if (!source->funcs)
+ return -EINVAL;
+
+ if (!adev->irq.client[client_id].sources) {
+ adev->irq.client[client_id].sources =
+ kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
+ sizeof(struct amdgpu_irq_src *),
+ GFP_KERNEL);
+ if (!adev->irq.client[client_id].sources)
+ return -ENOMEM;
+ }
+
+ if (adev->irq.client[client_id].sources[src_id] != NULL)
+ return -EINVAL;
+
+ if (source->num_types && !source->enabled_types) {
+ atomic_t *types;
+
+ types = kcalloc(source->num_types, sizeof(atomic_t),
+ GFP_KERNEL);
+ if (!types)
+ return -ENOMEM;
+
+ source->enabled_types = types;
+ }
+
+ adev->irq.client[client_id].sources[src_id] = source;
+ return 0;
+}
+
+/**
+ * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
+ *
+ * @adev: amdgpu device pointer
+ * @ih: interrupt ring instance
+ *
+ * Dispatches IRQ to IP blocks.
+ */
+void amdgpu_irq_dispatch(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 ring_index = ih->rptr >> 2;
+ struct amdgpu_iv_entry entry;
+ unsigned int client_id, src_id;
+ struct amdgpu_irq_src *src;
+ bool handled = false;
+ int r;
+
+ entry.ih = ih;
+ entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+ amdgpu_ih_decode_iv(adev, &entry);
+
+ trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
+
+ client_id = entry.client_id;
+ src_id = entry.src_id;
+
+ if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
+ DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
+
+ } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
+ DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
+
+ } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) &&
+ adev->irq.virq[src_id]) {
+ generic_handle_domain_irq(adev->irq.domain, src_id);
+
+ } else if (!adev->irq.client[client_id].sources) {
+ DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
+ client_id, src_id);
+
+ } else if ((src = adev->irq.client[client_id].sources[src_id])) {
+ r = src->funcs->process(adev, src, &entry);
+ if (r < 0)
+ DRM_ERROR("error processing interrupt (%d)\n", r);
+ else if (r)
+ handled = true;
+
+ } else {
+ DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n",
+ src_id, client_id);
+ }
+
+ /* Send it to amdkfd as well if it isn't already handled */
+ if (!handled)
+ amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
+
+ if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp))
+ ih->processed_timestamp = entry.timestamp;
+}
+
+/**
+ * amdgpu_irq_delegate - delegate IV to soft IH ring
+ *
+ * @adev: amdgpu device pointer
+ * @entry: IV entry
+ * @num_dw: size of IV
+ *
+ * Delegate the IV to the soft IH ring and schedule processing of it. Used
+ * if the hardware delegation to IH1 or IH2 doesn't work for some reason.
+ */
+void amdgpu_irq_delegate(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ unsigned int num_dw)
+{
+ amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
+ schedule_work(&adev->irq.ih_soft_work);
+}
+
+/**
+ * amdgpu_irq_update - update hardware interrupt state
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Updates interrupt state for the specific source (all ASICs).
+ */
+int amdgpu_irq_update(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned int type)
+{
+ unsigned long irqflags;
+ enum amdgpu_interrupt_state state;
+ int r;
+
+ spin_lock_irqsave(&adev->irq.lock, irqflags);
+
+ /* We need to determine after taking the lock, otherwise
+ * we might disable just enabled interrupts again
+ */
+ if (amdgpu_irq_enabled(adev, src, type))
+ state = AMDGPU_IRQ_STATE_ENABLE;
+ else
+ state = AMDGPU_IRQ_STATE_DISABLE;
+
+ r = src->funcs->set(adev, src, type, state);
+ spin_unlock_irqrestore(&adev->irq.lock, irqflags);
+ return r;
+}
+
+/**
+ * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Updates state of all types of interrupts on all sources on resume after
+ * reset.
+ */
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ amdgpu_restore_msix(adev);
+
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+ if (!adev->irq.client[i].sources)
+ continue;
+
+ for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+ struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+ if (!src || !src->funcs || !src->funcs->set)
+ continue;
+ for (k = 0; k < src->num_types; k++)
+ amdgpu_irq_update(adev, src, k);
+ }
+ }
+}
+
+/**
+ * amdgpu_irq_get - enable interrupt
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned int type)
+{
+ if (!adev->irq.installed)
+ return -ENOENT;
+
+ if (type >= src->num_types)
+ return -EINVAL;
+
+ if (!src->enabled_types || !src->funcs->set)
+ return -EINVAL;
+
+ if (atomic_inc_return(&src->enabled_types[type]) == 1)
+ return amdgpu_irq_update(adev, src, type);
+
+ return 0;
+}
+
+/**
+ * amdgpu_irq_put - disable interrupt
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned int type)
+{
+ if (!adev->irq.installed)
+ return -ENOENT;
+
+ if (type >= src->num_types)
+ return -EINVAL;
+
+ if (!src->enabled_types || !src->funcs->set)
+ return -EINVAL;
+
+ if (WARN_ON(!amdgpu_irq_enabled(adev, src, type)))
+ return -EINVAL;
+
+ if (atomic_dec_and_test(&src->enabled_types[type]))
+ return amdgpu_irq_update(adev, src, type);
+
+ return 0;
+}
+
+/**
+ * amdgpu_irq_enabled - check whether interrupt is enabled or not
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Checks whether the given type of interrupt is enabled on the given source.
+ *
+ * Returns:
+ * *true* if interrupt is enabled, *false* if interrupt is disabled or on
+ * invalid parameters
+ */
+bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned int type)
+{
+ if (!adev->irq.installed)
+ return false;
+
+ if (type >= src->num_types)
+ return false;
+
+ if (!src->enabled_types || !src->funcs->set)
+ return false;
+
+ return !!atomic_read(&src->enabled_types[type]);
+}
+
+/* XXX: Generic IRQ handling */
+static void amdgpu_irq_mask(struct irq_data *irqd)
+{
+ /* XXX */
+}
+
+static void amdgpu_irq_unmask(struct irq_data *irqd)
+{
+ /* XXX */
+}
+
+/* amdgpu hardware interrupt chip descriptor */
+static struct irq_chip amdgpu_irq_chip = {
+ .name = "amdgpu-ih",
+ .irq_mask = amdgpu_irq_mask,
+ .irq_unmask = amdgpu_irq_unmask,
+};
+
+/**
+ * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers
+ *
+ * @d: amdgpu IRQ domain pointer (unused)
+ * @irq: virtual IRQ number
+ * @hwirq: hardware irq number
+ *
+ * Current implementation assigns simple interrupt handler to the given virtual
+ * IRQ.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+static int amdgpu_irqdomain_map(struct irq_domain *d,
+ unsigned int irq, irq_hw_number_t hwirq)
+{
+ if (hwirq >= AMDGPU_MAX_IRQ_SRC_ID)
+ return -EPERM;
+
+ irq_set_chip_and_handler(irq,
+ &amdgpu_irq_chip, handle_simple_irq);
+ return 0;
+}
+
+/* Implementation of methods for amdgpu IRQ domain */
+static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
+ .map = amdgpu_irqdomain_map,
+};
+
+/**
+ * amdgpu_irq_add_domain - create a linear IRQ domain
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Creates an IRQ domain for GPU interrupt sources
+ * that may be driven by another driver (e.g., ACP).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_add_domain(struct amdgpu_device *adev)
+{
+ adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
+ if (!adev->irq.domain) {
+ DRM_ERROR("GPU irq add domain failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_irq_remove_domain - remove the IRQ domain
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Removes the IRQ domain for GPU interrupt sources
+ * that may be driven by another driver (e.g., ACP).
+ */
+void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
+{
+ if (adev->irq.domain) {
+ irq_domain_remove(adev->irq.domain);
+ adev->irq.domain = NULL;
+ }
+}
+
+/**
+ * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs
+ *
+ * @adev: amdgpu device pointer
+ * @src_id: IH source id
+ *
+ * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ
+ * Use this for components that generate a GPU interrupt, but are driven
+ * by a different driver (e.g., ACP).
+ *
+ * Returns:
+ * Linux IRQ
+ */
+unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
+{
+ adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
+
+ return adev->irq.virq[src_id];
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
new file mode 100644
index 000000000..04c0b4fa1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IRQ_H__
+#define __AMDGPU_IRQ_H__
+
+#include <linux/irqdomain.h>
+#include "soc15_ih_clientid.h"
+#include "amdgpu_ih.h"
+
+#define AMDGPU_MAX_IRQ_SRC_ID 0x100
+#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100
+
+#define AMDGPU_IRQ_CLIENTID_LEGACY 0
+#define AMDGPU_IRQ_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
+
+#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW 4
+
+struct amdgpu_device;
+
+enum amdgpu_interrupt_state {
+ AMDGPU_IRQ_STATE_DISABLE,
+ AMDGPU_IRQ_STATE_ENABLE,
+};
+
+struct amdgpu_iv_entry {
+ struct amdgpu_ih_ring *ih;
+ unsigned client_id;
+ unsigned src_id;
+ unsigned ring_id;
+ unsigned vmid;
+ unsigned vmid_src;
+ uint64_t timestamp;
+ unsigned timestamp_src;
+ unsigned pasid;
+ unsigned node_id;
+ unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
+ const uint32_t *iv_entry;
+};
+
+struct amdgpu_irq_src {
+ unsigned num_types;
+ atomic_t *enabled_types;
+ const struct amdgpu_irq_src_funcs *funcs;
+};
+
+struct amdgpu_irq_client {
+ struct amdgpu_irq_src **sources;
+};
+
+/* provided by interrupt generating IP blocks */
+struct amdgpu_irq_src_funcs {
+ int (*set)(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ unsigned type, enum amdgpu_interrupt_state state);
+
+ int (*process)(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_irq {
+ bool installed;
+ unsigned int irq;
+ spinlock_t lock;
+ /* interrupt sources */
+ struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX];
+
+ /* status, etc. */
+ bool msi_enabled; /* msi enabled */
+
+ /* interrupt rings */
+ struct amdgpu_ih_ring ih, ih1, ih2, ih_soft;
+ const struct amdgpu_ih_funcs *ih_funcs;
+ struct work_struct ih1_work, ih2_work, ih_soft_work;
+ struct amdgpu_irq_src self_irq;
+
+ /* gen irq stuff */
+ struct irq_domain *domain; /* GPU irq controller domain */
+ unsigned virq[AMDGPU_MAX_IRQ_SRC_ID];
+ uint32_t srbm_soft_reset;
+ u32 retry_cam_doorbell_index;
+ bool retry_cam_enabled;
+};
+
+enum interrupt_node_id_per_aid {
+ AID0_NODEID = 0,
+ XCD0_NODEID = 1,
+ XCD1_NODEID = 2,
+ AID1_NODEID = 4,
+ XCD2_NODEID = 5,
+ XCD3_NODEID = 6,
+ AID2_NODEID = 8,
+ XCD4_NODEID = 9,
+ XCD5_NODEID = 10,
+ AID3_NODEID = 12,
+ XCD6_NODEID = 13,
+ XCD7_NODEID = 14,
+ NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
+void amdgpu_irq_disable_all(struct amdgpu_device *adev);
+
+int amdgpu_irq_init(struct amdgpu_device *adev);
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev);
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev);
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+ unsigned client_id, unsigned src_id,
+ struct amdgpu_irq_src *source);
+void amdgpu_irq_dispatch(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih);
+void amdgpu_irq_delegate(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ unsigned int num_dw);
+int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned type);
+int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned type);
+int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned type);
+bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+ unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
+
+int amdgpu_irq_add_domain(struct amdgpu_device *adev);
+void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
+unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
new file mode 100644
index 000000000..78476bc75
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+
+static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
+ struct amdgpu_task_info ti;
+ struct amdgpu_device *adev = ring->adev;
+ int idx;
+ int r;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
+ DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
+ __func__, s_job->sched->name);
+
+ /* Effectively the job is aborted as the device is gone */
+ return DRM_GPU_SCHED_STAT_ENODEV;
+ }
+
+ memset(&ti, 0, sizeof(struct amdgpu_task_info));
+ adev->job_hang = true;
+
+ if (amdgpu_gpu_recovery &&
+ amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
+ DRM_ERROR("ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
+ goto exit;
+ }
+
+ amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
+ DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
+ DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
+ ti.process_name, ti.tgid, ti.task_name, ti.pid);
+
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
+ if (amdgpu_device_should_recover_gpu(ring->adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
+ if (r)
+ DRM_ERROR("GPU Recovery Failed: %d\n", r);
+ } else {
+ drm_sched_suspend_timeout(&ring->sched);
+ if (amdgpu_sriov_vf(adev))
+ adev->virt.tdr_debug = true;
+ }
+
+exit:
+ adev->job_hang = false;
+ drm_dev_exit(idx);
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job)
+{
+ if (num_ibs == 0)
+ return -EINVAL;
+
+ *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
+ if (!*job)
+ return -ENOMEM;
+
+ /*
+ * Initialize the scheduler to at least some ring so that we always
+ * have a pointer to adev.
+ */
+ (*job)->base.sched = &adev->rings[0]->sched;
+ (*job)->vm = vm;
+
+ amdgpu_sync_create(&(*job)->explicit_sync);
+ (*job)->generation = amdgpu_vm_generation(adev, vm);
+ (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
+
+ if (!entity)
+ return 0;
+
+ return drm_sched_job_init(&(*job)->base, entity, owner);
+}
+
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job)
+{
+ int r;
+
+ r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job);
+ if (r)
+ return r;
+
+ (*job)->num_ibs = 1;
+ r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
+ if (r) {
+ if (entity)
+ drm_sched_job_cleanup(&(*job)->base);
+ kfree(*job);
+ }
+
+ return r;
+}
+
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+ if (gds) {
+ job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+ }
+ if (gws) {
+ job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+ }
+ if (oa) {
+ job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+ }
+}
+
+void amdgpu_job_free_resources(struct amdgpu_job *job)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ struct dma_fence *f;
+ unsigned i;
+
+ /* Check if any fences where initialized */
+ if (job->base.s_fence && job->base.s_fence->finished.ops)
+ f = &job->base.s_fence->finished;
+ else if (job->hw_fence.ops)
+ f = &job->hw_fence;
+ else
+ f = NULL;
+
+ for (i = 0; i < job->num_ibs; ++i)
+ amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+}
+
+static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
+{
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
+
+ drm_sched_job_cleanup(s_job);
+
+ amdgpu_sync_free(&job->explicit_sync);
+
+ /* only put the hw fence if has embedded fence */
+ if (!job->hw_fence.ops)
+ kfree(job);
+ else
+ dma_fence_put(&job->hw_fence);
+}
+
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader)
+{
+ struct dma_fence *fence = &leader->base.s_fence->scheduled;
+
+ WARN_ON(job->gang_submit);
+
+ /*
+ * Don't add a reference when we are the gang leader to avoid circle
+ * dependency.
+ */
+ if (job != leader)
+ dma_fence_get(fence);
+ job->gang_submit = fence;
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+ if (job->base.entity)
+ drm_sched_job_cleanup(&job->base);
+
+ amdgpu_job_free_resources(job);
+ amdgpu_sync_free(&job->explicit_sync);
+ if (job->gang_submit != &job->base.s_fence->scheduled)
+ dma_fence_put(job->gang_submit);
+
+ if (!job->hw_fence.ops)
+ kfree(job);
+ else
+ dma_fence_put(&job->hw_fence);
+}
+
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
+{
+ struct dma_fence *f;
+
+ drm_sched_job_arm(&job->base);
+ f = dma_fence_get(&job->base.s_fence->finished);
+ amdgpu_job_free_resources(job);
+ drm_sched_entity_push_job(&job->base);
+
+ return f;
+}
+
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence)
+{
+ int r;
+
+ job->base.sched = &ring->sched;
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
+
+ if (r)
+ return r;
+
+ amdgpu_job_free(job);
+ return 0;
+}
+
+static struct dma_fence *
+amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
+ struct amdgpu_job *job = to_amdgpu_job(sched_job);
+ struct dma_fence *fence = NULL;
+ int r;
+
+ /* Ignore soft recovered fences here */
+ r = drm_sched_entity_error(s_entity);
+ if (r && r != -ENODATA)
+ goto error;
+
+ if (!fence && job->gang_submit)
+ fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+
+ while (!fence && job->vm && !job->vmid) {
+ r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
+ if (r) {
+ DRM_ERROR("Error getting VM ID (%d)\n", r);
+ goto error;
+ }
+ }
+
+ return fence;
+
+error:
+ dma_fence_set_error(&job->base.s_fence->finished, r);
+ return NULL;
+}
+
+static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *fence = NULL, *finished;
+ struct amdgpu_job *job;
+ int r = 0;
+
+ job = to_amdgpu_job(sched_job);
+ finished = &job->base.s_fence->finished;
+
+ trace_amdgpu_sched_run_job(job);
+
+ /* Skip job if VRAM is lost and never resubmit gangs */
+ if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
+ (job->job_run_counter && job->gang_submit))
+ dma_fence_set_error(finished, -ECANCELED);
+
+ if (finished->error < 0) {
+ DRM_INFO("Skip scheduling IBs!\n");
+ } else {
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
+ &fence);
+ if (r)
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ }
+
+ job->job_run_counter++;
+ amdgpu_job_free_resources(job);
+
+ fence = r ? ERR_PTR(r) : fence;
+ return fence;
+}
+
+#define to_drm_sched_job(sched_job) \
+ container_of((sched_job), struct drm_sched_job, queue_node)
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *s_job;
+ struct drm_sched_entity *s_entity = NULL;
+ int i;
+
+ /* Signal all jobs not yet scheduled */
+ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = &sched->sched_rq[i];
+ spin_lock(&rq->lock);
+ list_for_each_entry(s_entity, &rq->entities, list) {
+ while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_signal(&s_fence->scheduled);
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+ }
+ spin_unlock(&rq->lock);
+ }
+
+ /* Signal all jobs already scheduled to HW */
+ list_for_each_entry(s_job, &sched->pending_list, list) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+}
+
+const struct drm_sched_backend_ops amdgpu_sched_ops = {
+ .prepare_job = amdgpu_job_prepare_job,
+ .run_job = amdgpu_job_run,
+ .timedout_job = amdgpu_job_timedout,
+ .free_job = amdgpu_job_free_cb
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
new file mode 100644
index 000000000..a963a25dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_JOB_H__
+#define __AMDGPU_JOB_H__
+
+#include <drm/gpu_scheduler.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
+/* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
+/* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1)
+/* bit set means context switch occured */
+#define AMDGPU_HAVE_CTX_SWITCH (1 << 2)
+/* bit set means IB is preempted */
+#define AMDGPU_IB_PREEMPTED (1 << 3)
+
+#define to_amdgpu_job(sched_job) \
+ container_of((sched_job), struct amdgpu_job, base)
+
+#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
+
+struct amdgpu_fence;
+enum amdgpu_ib_pool_type;
+
+struct amdgpu_job {
+ struct drm_sched_job base;
+ struct amdgpu_vm *vm;
+ struct amdgpu_sync explicit_sync;
+ struct dma_fence hw_fence;
+ struct dma_fence *gang_submit;
+ uint32_t preamble_status;
+ uint32_t preemption_status;
+ bool vm_needs_flush;
+ bool gds_switch_needed;
+ bool spm_update_needed;
+ uint64_t vm_pd_addr;
+ unsigned vmid;
+ unsigned pasid;
+ uint32_t gds_base, gds_size;
+ uint32_t gws_base, gws_size;
+ uint32_t oa_base, oa_size;
+ uint64_t generation;
+
+ /* user fence handling */
+ uint64_t uf_addr;
+ uint64_t uf_sequence;
+
+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_t shadow_va;
+ uint64_t csa_va;
+ uint64_t gds_va;
+ bool init_shadow;
+
+ /* job_run_counter >= 1 means a resubmit job */
+ uint32_t job_run_counter;
+
+ uint32_t num_ibs;
+ struct amdgpu_ib ibs[];
+};
+
+static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
+{
+ return to_amdgpu_ring(job->base.entity->rq->sched);
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct drm_sched_entity *entity, void *owner,
+ unsigned int num_ibs, struct amdgpu_job **job);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity, void *owner,
+ size_t size, enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa);
+void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+ struct amdgpu_job *leader);
+void amdgpu_job_free(struct amdgpu_job *job);
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence);
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
new file mode 100644
index 000000000..2ff2897fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
+{
+ INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+ mutex_init(&adev->jpeg.jpeg_pg_lock);
+ atomic_set(&adev->jpeg.total_submission_cnt, 0);
+
+ return 0;
+}
+
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
+ }
+
+ mutex_destroy(&adev->jpeg.jpeg_pg_lock);
+
+ return 0;
+}
+
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev)
+{
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ return 0;
+}
+
+int amdgpu_jpeg_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, jpeg.idle_work.work);
+ unsigned int fences = 0;
+ unsigned int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
+ }
+
+ if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_GATE);
+ else
+ schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ atomic_inc(&adev->jpeg.total_submission_cnt);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ mutex_lock(&adev->jpeg.jpeg_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_UNGATE);
+ mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+}
+
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
+{
+ atomic_dec(&ring->adev->jpeg.total_submission_cnt);
+ schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* JPEG in SRIOV does not support direct register read/write */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
+ /* Add a read register to make sure the write register is executed. */
+ RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
+ amdgpu_ring_write(ring, 0xABADCAFE);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xABADCAFE)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ const unsigned ib_size_dw = 16;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
+ ib->ptr[1] = 0xDEADBEEF;
+ for (i = 2; i < 16; i += 2) {
+ ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ struct dma_fence *fence = NULL;
+ long r = 0;
+
+ r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ goto error;
+ } else {
+ r = 0;
+ }
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ }
+
+ dma_fence_put(fence);
+error:
+ return r;
+}
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->jpeg.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+
+ return 0;
+}
+
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i) ||
+ !adev->jpeg.inst[i].ras_poison_irq.funcs)
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_jpeg_ras *ras;
+
+ if (!adev->jpeg.ras)
+ return 0;
+
+ ras = adev->jpeg.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register jpeg ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "jpeg");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->jpeg.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
new file mode 100644
index 000000000..ffe47e9f5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_JPEG_H__
+#define __AMDGPU_JPEG_H__
+
+#include "amdgpu_ras.h"
+
+#define AMDGPU_MAX_JPEG_INSTANCES 4
+#define AMDGPU_MAX_JPEG_RINGS 8
+
+#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
+#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+
+struct amdgpu_jpeg_reg{
+ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
+};
+
+struct amdgpu_jpeg_inst {
+ struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
+ struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
+ struct amdgpu_jpeg_reg external;
+ uint8_t aid_id;
+};
+
+struct amdgpu_jpeg_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_jpeg {
+ uint8_t num_jpeg_inst;
+ struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ unsigned num_jpeg_rings;
+ struct amdgpu_jpeg_reg internal;
+ unsigned harvest_config;
+ struct delayed_work idle_work;
+ enum amd_powergating_state cur_state;
+ struct mutex jpeg_pg_lock;
+ atomic_t total_submission_cnt;
+ struct ras_common_if *ras_if;
+ struct amdgpu_jpeg_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+};
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev);
+int amdgpu_jpeg_resume(struct amdgpu_device *adev);
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
+
+#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
new file mode 100644
index 000000000..58dab4f73
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -0,0 +1,1756 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+#include "amdgpu.h"
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "atom.h"
+
+#include <linux/vga_switcheroo.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_display.h"
+#include "amdgpu_ras.h"
+#include "amd_pcie.h"
+
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+{
+ struct amdgpu_gpu_instance *gpu_instance;
+ int i;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev == adev) {
+ mgpu_info.gpu_ins[i] =
+ mgpu_info.gpu_ins[mgpu_info.num_gpu - 1];
+ mgpu_info.num_gpu--;
+ if (adev->flags & AMD_IS_APU)
+ mgpu_info.num_apu--;
+ else
+ mgpu_info.num_dgpu--;
+ break;
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+}
+
+/**
+ * amdgpu_driver_unload_kms - Main unload function for KMS.
+ *
+ * @dev: drm dev pointer
+ *
+ * This is the main unload function for KMS (all asics).
+ * Returns 0 on success.
+ */
+void amdgpu_driver_unload_kms(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (adev == NULL)
+ return;
+
+ amdgpu_unregister_gpu_instance(adev);
+
+ if (adev->rmmio == NULL)
+ return;
+
+ if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD))
+ DRM_WARN("smart shift update failed\n");
+
+ amdgpu_acpi_fini(adev);
+ amdgpu_device_fini_hw(adev);
+}
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+{
+ struct amdgpu_gpu_instance *gpu_instance;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
+ DRM_ERROR("Cannot register more gpu instance\n");
+ mutex_unlock(&mgpu_info.mutex);
+ return;
+ }
+
+ gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]);
+ gpu_instance->adev = adev;
+ gpu_instance->mgpu_fan_enabled = 0;
+
+ mgpu_info.num_gpu++;
+ if (adev->flags & AMD_IS_APU)
+ mgpu_info.num_apu++;
+ else
+ mgpu_info.num_dgpu++;
+
+ mutex_unlock(&mgpu_info.mutex);
+}
+
+/**
+ * amdgpu_driver_load_kms - Main load function for KMS.
+ *
+ * @adev: pointer to struct amdgpu_device
+ * @flags: device flags
+ *
+ * This is the main load function for KMS (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
+{
+ struct drm_device *dev;
+ int r, acpi_status;
+
+ dev = adev_to_drm(adev);
+
+ /* amdgpu_device_init should report only fatal error
+ * like memory allocation failure or iomapping failure,
+ * or memory manager initialization failure, it must
+ * properly initialize the GPU MC controller and permit
+ * VRAM allocation
+ */
+ r = amdgpu_device_init(adev, flags);
+ if (r) {
+ dev_err(dev->dev, "Fatal error during GPU init\n");
+ goto out;
+ }
+
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+ if (amdgpu_device_supports_px(dev) &&
+ (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+ dev_info(adev->dev, "Using ATPX for runtime pm\n");
+ } else if (amdgpu_device_supports_boco(dev) &&
+ (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+ dev_info(adev->dev, "Using BOCO for runtime pm\n");
+ } else if (amdgpu_device_supports_baco(dev) &&
+ (amdgpu_runtime_pm != 0)) {
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ /* enable BACO as runpm mode if runpm=1 */
+ if (amdgpu_runtime_pm > 0)
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ case CHIP_VEGA10:
+ /* enable BACO as runpm mode if noretry=0 */
+ if (!adev->gmc.noretry)
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ default:
+ /* enable BACO as runpm mode on CI+ */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+
+ /* Call ACPI methods: require modeset init
+ * but failure is not fatal
+ */
+
+ acpi_status = amdgpu_acpi_init(adev);
+ if (acpi_status)
+ dev_dbg(dev->dev, "Error during ACPI methods call\n");
+
+ if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD))
+ DRM_WARN("smart shift update failed\n");
+
+out:
+ if (r)
+ amdgpu_driver_unload_kms(dev);
+
+ return r;
+}
+
+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+ struct drm_amdgpu_query_fw *query_fw,
+ struct amdgpu_device *adev)
+{
+ switch (query_fw->fw_type) {
+ case AMDGPU_INFO_FW_VCE:
+ fw_info->ver = adev->vce.fw_version;
+ fw_info->feature = adev->vce.fb_version;
+ break;
+ case AMDGPU_INFO_FW_UVD:
+ fw_info->ver = adev->uvd.fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_VCN:
+ fw_info->ver = adev->vcn.fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_GMC:
+ fw_info->ver = adev->gmc.fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_GFX_ME:
+ fw_info->ver = adev->gfx.me_fw_version;
+ fw_info->feature = adev->gfx.me_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_PFP:
+ fw_info->ver = adev->gfx.pfp_fw_version;
+ fw_info->feature = adev->gfx.pfp_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_CE:
+ fw_info->ver = adev->gfx.ce_fw_version;
+ fw_info->feature = adev->gfx.ce_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC:
+ fw_info->ver = adev->gfx.rlc_fw_version;
+ fw_info->feature = adev->gfx.rlc_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+ fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+ fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+ fw_info->ver = adev->gfx.rlc_srls_fw_version;
+ fw_info->feature = adev->gfx.rlc_srls_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLCP:
+ fw_info->ver = adev->gfx.rlcp_ucode_version;
+ fw_info->feature = adev->gfx.rlcp_ucode_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLCV:
+ fw_info->ver = adev->gfx.rlcv_ucode_version;
+ fw_info->feature = adev->gfx.rlcv_ucode_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_MEC:
+ if (query_fw->index == 0) {
+ fw_info->ver = adev->gfx.mec_fw_version;
+ fw_info->feature = adev->gfx.mec_feature_version;
+ } else if (query_fw->index == 1) {
+ fw_info->ver = adev->gfx.mec2_fw_version;
+ fw_info->feature = adev->gfx.mec2_feature_version;
+ } else
+ return -EINVAL;
+ break;
+ case AMDGPU_INFO_FW_SMC:
+ fw_info->ver = adev->pm.fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_TA:
+ switch (query_fw->index) {
+ case TA_FW_TYPE_PSP_XGMI:
+ fw_info->ver = adev->psp.xgmi_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.xgmi_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_RAS:
+ fw_info->ver = adev->psp.ras_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.ras_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_HDCP:
+ fw_info->ver = adev->psp.hdcp_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.hdcp_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_DTM:
+ fw_info->ver = adev->psp.dtm_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.dtm_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_RAP:
+ fw_info->ver = adev->psp.rap_context.context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.rap_context.context
+ .bin_desc.feature_version;
+ break;
+ case TA_FW_TYPE_PSP_SECUREDISPLAY:
+ fw_info->ver = adev->psp.securedisplay_context.context.bin_desc.fw_version;
+ fw_info->feature =
+ adev->psp.securedisplay_context.context.bin_desc
+ .feature_version;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_FW_SDMA:
+ if (query_fw->index >= adev->sdma.num_instances)
+ return -EINVAL;
+ fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+ fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+ break;
+ case AMDGPU_INFO_FW_SOS:
+ fw_info->ver = adev->psp.sos.fw_version;
+ fw_info->feature = adev->psp.sos.feature_version;
+ break;
+ case AMDGPU_INFO_FW_ASD:
+ fw_info->ver = adev->psp.asd_context.bin_desc.fw_version;
+ fw_info->feature = adev->psp.asd_context.bin_desc.feature_version;
+ break;
+ case AMDGPU_INFO_FW_DMCU:
+ fw_info->ver = adev->dm.dmcu_fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_DMCUB:
+ fw_info->ver = adev->dm.dmcub_fw_version;
+ fw_info->feature = 0;
+ break;
+ case AMDGPU_INFO_FW_TOC:
+ fw_info->ver = adev->psp.toc.fw_version;
+ fw_info->feature = adev->psp.toc.feature_version;
+ break;
+ case AMDGPU_INFO_FW_CAP:
+ fw_info->ver = adev->psp.cap_fw_version;
+ fw_info->feature = adev->psp.cap_feature_version;
+ break;
+ case AMDGPU_INFO_FW_MES_KIQ:
+ fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_MES:
+ fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+ >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+ break;
+ case AMDGPU_INFO_FW_IMU:
+ fw_info->ver = adev->gfx.imu_fw_version;
+ fw_info->feature = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_hw_ip *result)
+{
+ uint32_t ib_start_alignment = 0;
+ uint32_t ib_size_alignment = 0;
+ enum amd_ip_block_type type;
+ unsigned int num_rings = 0;
+ unsigned int i, j;
+
+ if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+ return -EINVAL;
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ if (adev->gfx.gfx_ring[i].sched.ready)
+ ++num_rings;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ if (adev->gfx.compute_ring[i].sched.ready)
+ ++num_rings;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ if (adev->sdma.instance[i].ring.sched.ready)
+ ++num_rings;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ if (adev->uvd.inst[i].ring.sched.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ for (i = 0; i < adev->vce.num_rings; i++)
+ if (adev->vce.ring[i].sched.ready)
+ ++num_rings;
+ ib_start_alignment = 4;
+ ib_size_alignment = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->uvd.num_enc_rings; j++)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->vcn.inst[i].ring_dec.sched.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type &&
+ adev->ip_blocks[i].status.valid)
+ break;
+
+ if (i == adev->num_ip_blocks)
+ return 0;
+
+ num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
+ num_rings);
+
+ result->hw_ip_version_major = adev->ip_blocks[i].version->major;
+ result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
+ break;
+ default:
+ result->ip_discovery_version = 0;
+ break;
+ }
+ } else {
+ result->ip_discovery_version = 0;
+ }
+ result->capabilities_flags = 0;
+ result->available_rings = (1 << num_rings) - 1;
+ result->ib_start_alignment = ib_start_alignment;
+ result->ib_size_alignment = ib_size_alignment;
+ return 0;
+}
+
+/*
+ * Userspace get information ioctl
+ */
+/**
+ * amdgpu_info_ioctl - answer a device specific request.
+ *
+ * @dev: drm device pointer
+ * @data: request object
+ * @filp: drm filp
+ *
+ * This function is used to pass device specific parameters to the userspace
+ * drivers. Examples include: pci device id, pipeline parms, tiling params,
+ * etc. (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
+int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_amdgpu_info *info = data;
+ struct amdgpu_mode_info *minfo = &adev->mode_info;
+ void __user *out = (void __user *)(uintptr_t)info->return_pointer;
+ uint32_t size = info->return_size;
+ struct drm_crtc *crtc;
+ uint32_t ui32 = 0;
+ uint64_t ui64 = 0;
+ int i, found;
+ int ui32_size = sizeof(ui32);
+
+ if (!info->return_size || !info->return_pointer)
+ return -EINVAL;
+
+ switch (info->query) {
+ case AMDGPU_INFO_ACCEL_WORKING:
+ ui32 = adev->accel_working;
+ return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_CRTC_FROM_ID:
+ for (i = 0, found = 0; i < adev->mode_info.num_crtc; i++) {
+ crtc = (struct drm_crtc *)minfo->crtcs[i];
+ if (crtc && crtc->base.id == info->mode_crtc.id) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ ui32 = amdgpu_crtc->crtc_id;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ DRM_DEBUG_KMS("unknown crtc id %d\n", info->mode_crtc.id);
+ return -EINVAL;
+ }
+ return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_HW_IP_INFO: {
+ struct drm_amdgpu_info_hw_ip ip = {};
+ int ret;
+
+ ret = amdgpu_hw_ip_info(adev, info, &ip);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip)));
+ return ret ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_HW_IP_COUNT: {
+ enum amd_ip_block_type type;
+ uint32_t count = 0;
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type &&
+ adev->ip_blocks[i].status.valid &&
+ count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+ count++;
+
+ return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_TIMESTAMP:
+ ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_FW_VERSION: {
+ struct drm_amdgpu_info_firmware fw_info;
+ int ret;
+
+ /* We only support one instance of each IP block right now. */
+ if (info->query_fw.ip_instance != 0)
+ return -EINVAL;
+
+ ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+ if (ret)
+ return ret;
+
+ return copy_to_user(out, &fw_info,
+ min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_NUM_BYTES_MOVED:
+ ui64 = atomic64_read(&adev->num_bytes_moved);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_NUM_EVICTIONS:
+ ui64 = atomic64_read(&adev->num_evictions);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
+ ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_VRAM_USAGE:
+ ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_VIS_VRAM_USAGE:
+ ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_GTT_USAGE:
+ ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
+ return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_GDS_CONFIG: {
+ struct drm_amdgpu_info_gds gds_info;
+
+ memset(&gds_info, 0, sizeof(gds_info));
+ gds_info.compute_partition_size = adev->gds.gds_size;
+ gds_info.gds_total_size = adev->gds.gds_size;
+ gds_info.gws_per_compute_partition = adev->gds.gws_size;
+ gds_info.oa_per_compute_partition = adev->gds.oa_size;
+ return copy_to_user(out, &gds_info,
+ min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_VRAM_GTT: {
+ struct drm_amdgpu_info_vram_gtt vram_gtt;
+
+ vram_gtt.vram_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
+ vram_gtt.vram_cpu_accessible_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ vram_gtt.vram_size);
+ vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
+ vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
+ return copy_to_user(out, &vram_gtt,
+ min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_MEMORY: {
+ struct drm_amdgpu_memory_info mem;
+ struct ttm_resource_manager *gtt_man =
+ &adev->mman.gtt_mgr.manager;
+ struct ttm_resource_manager *vram_man =
+ &adev->mman.vram_mgr.manager;
+
+ memset(&mem, 0, sizeof(mem));
+ mem.vram.total_heap_size = adev->gmc.real_vram_size;
+ mem.vram.usable_heap_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
+ mem.vram.heap_usage =
+ ttm_resource_manager_usage(vram_man);
+ mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
+
+ mem.cpu_accessible_vram.total_heap_size =
+ adev->gmc.visible_vram_size;
+ mem.cpu_accessible_vram.usable_heap_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ mem.vram.usable_heap_size);
+ mem.cpu_accessible_vram.heap_usage =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+ mem.cpu_accessible_vram.max_allocation =
+ mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
+
+ mem.gtt.total_heap_size = gtt_man->size;
+ mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
+ atomic64_read(&adev->gart_pin_size);
+ mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
+ mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
+
+ return copy_to_user(out, &mem,
+ min((size_t)size, sizeof(mem)))
+ ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_READ_MMR_REG: {
+ unsigned int n, alloc_size;
+ uint32_t *regs;
+ unsigned int se_num = (info->read_mmr_reg.instance >>
+ AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
+ AMDGPU_INFO_MMR_SE_INDEX_MASK;
+ unsigned int sh_num = (info->read_mmr_reg.instance >>
+ AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
+ AMDGPU_INFO_MMR_SH_INDEX_MASK;
+
+ /* set full masks if the userspace set all bits
+ * in the bitfields
+ */
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+ se_num = 0xffffffff;
+ else if (se_num >= AMDGPU_GFX_MAX_SE)
+ return -EINVAL;
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+ sh_num = 0xffffffff;
+ else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
+ return -EINVAL;
+
+ if (info->read_mmr_reg.count > 128)
+ return -EINVAL;
+
+ regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+ alloc_size = info->read_mmr_reg.count * sizeof(*regs);
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < info->read_mmr_reg.count; i++) {
+ if (amdgpu_asic_read_register(adev, se_num, sh_num,
+ info->read_mmr_reg.dword_offset + i,
+ &regs[i])) {
+ DRM_DEBUG_KMS("unallowed offset %#x\n",
+ info->read_mmr_reg.dword_offset + i);
+ kfree(regs);
+ amdgpu_gfx_off_ctrl(adev, true);
+ return -EFAULT;
+ }
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+ n = copy_to_user(out, regs, min(size, alloc_size));
+ kfree(regs);
+ return n ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_DEV_INFO: {
+ struct drm_amdgpu_info_device *dev_info;
+ uint64_t vm_size;
+ uint32_t pcie_gen_mask;
+ int ret;
+
+ dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
+ if (!dev_info)
+ return -ENOMEM;
+
+ dev_info->device_id = adev->pdev->device;
+ dev_info->chip_rev = adev->rev_id;
+ dev_info->external_rev = adev->external_rev_id;
+ dev_info->pci_rev = adev->pdev->revision;
+ dev_info->family = adev->family;
+ dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+ dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+ /* return all clocks in KHz */
+ dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+ if (adev->pm.dpm_enabled) {
+ dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
+ dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
+ dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 10;
+ dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 10;
+ } else {
+ dev_info->max_engine_clock =
+ dev_info->min_engine_clock =
+ adev->clock.default_sclk * 10;
+ dev_info->max_memory_clock =
+ dev_info->min_memory_clock =
+ adev->clock.default_mclk * 10;
+ }
+ dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+ dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines;
+ dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+ dev_info->ids_flags = 0;
+ if (adev->flags & AMD_IS_APU)
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
+ if (adev->gfx.mcbp)
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+ if (amdgpu_is_tmz(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+ if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+
+ vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_SIZE;
+
+ /* Older VCE FW versions are buggy and can handle only 40bits */
+ if (adev->vce.fw_version &&
+ adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+ vm_size = min(vm_size, 1ULL << 40);
+
+ dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
+ dev_info->virtual_address_max =
+ min(vm_size, AMDGPU_GMC_HOLE_START);
+
+ if (vm_size > AMDGPU_GMC_HOLE_START) {
+ dev_info->high_va_offset = AMDGPU_GMC_HOLE_END;
+ dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
+ }
+ dev_info->virtual_address_alignment = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+ dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
+ dev_info->gart_page_size = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+ dev_info->cu_active_number = adev->gfx.cu_info.number;
+ dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
+ dev_info->ce_ram_size = adev->gfx.ce_ram_size;
+ memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
+ sizeof(adev->gfx.cu_info.ao_cu_bitmap));
+ memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
+ sizeof(dev_info->cu_bitmap));
+ dev_info->vram_type = adev->gmc.vram_type;
+ dev_info->vram_bit_width = adev->gmc.vram_width;
+ dev_info->vce_harvest_config = adev->vce.harvest_config;
+ dev_info->gc_double_offchip_lds_buf =
+ adev->gfx.config.double_offchip_lds_buf;
+ dev_info->wave_front_size = adev->gfx.cu_info.wave_front_size;
+ dev_info->num_shader_visible_vgprs = adev->gfx.config.max_gprs;
+ dev_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+ dev_info->num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;
+ dev_info->gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;
+ dev_info->gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
+ dev_info->max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;
+
+ if (adev->family >= AMDGPU_FAMILY_NV)
+ dev_info->pa_sc_tile_steering_override =
+ adev->gfx.config.pa_sc_tile_steering_override;
+
+ dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+
+ /* Combine the chip gen mask with the platform (CPU/mobo) mask. */
+ pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16);
+ dev_info->pcie_gen = fls(pcie_gen_mask);
+ dev_info->pcie_num_lanes =
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+ adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+
+ dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
+ dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
+ dev_info->sqc_data_cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ dev_info->sqc_inst_cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ dev_info->gl1c_cache_size = adev->gfx.config.gc_gl1c_size_per_instance *
+ adev->gfx.config.gc_gl1c_per_sa;
+ dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+ dev_info->mall_size = adev->gmc.mall_size;
+
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+ if (!ret) {
+ dev_info->shadow_size = shadow_info.shadow_size;
+ dev_info->shadow_alignment = shadow_info.shadow_alignment;
+ dev_info->csa_size = shadow_info.csa_size;
+ dev_info->csa_alignment = shadow_info.csa_alignment;
+ }
+ }
+
+ ret = copy_to_user(out, dev_info,
+ min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
+ kfree(dev_info);
+ return ret;
+ }
+ case AMDGPU_INFO_VCE_CLOCK_TABLE: {
+ unsigned int i;
+ struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
+ struct amd_vce_state *vce_state;
+
+ for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) {
+ vce_state = amdgpu_dpm_get_vce_clock_state(adev, i);
+ if (vce_state) {
+ vce_clk_table.entries[i].sclk = vce_state->sclk;
+ vce_clk_table.entries[i].mclk = vce_state->mclk;
+ vce_clk_table.entries[i].eclk = vce_state->evclk;
+ vce_clk_table.num_valid_entries++;
+ }
+ }
+
+ return copy_to_user(out, &vce_clk_table,
+ min((size_t)size, sizeof(vce_clk_table))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_VBIOS: {
+ uint32_t bios_size = adev->bios_size;
+
+ switch (info->vbios_info.type) {
+ case AMDGPU_INFO_VBIOS_SIZE:
+ return copy_to_user(out, &bios_size,
+ min((size_t)size, sizeof(bios_size)))
+ ? -EFAULT : 0;
+ case AMDGPU_INFO_VBIOS_IMAGE: {
+ uint8_t *bios;
+ uint32_t bios_offset = info->vbios_info.offset;
+
+ if (bios_offset >= bios_size)
+ return -EINVAL;
+
+ bios = adev->bios + bios_offset;
+ return copy_to_user(out, bios,
+ min((size_t)size, (size_t)(bios_size - bios_offset)))
+ ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_VBIOS_INFO: {
+ struct drm_amdgpu_info_vbios vbios_info = {};
+ struct atom_context *atom_context;
+
+ atom_context = adev->mode_info.atom_context;
+ if (atom_context) {
+ memcpy(vbios_info.name, atom_context->name,
+ sizeof(atom_context->name));
+ memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+ sizeof(atom_context->vbios_pn));
+ vbios_info.version = atom_context->version;
+ memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+ sizeof(atom_context->vbios_ver_str));
+ memcpy(vbios_info.date, atom_context->date,
+ sizeof(atom_context->date));
+ }
+
+ return copy_to_user(out, &vbios_info,
+ min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
+ }
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n",
+ info->vbios_info.type);
+ return -EINVAL;
+ }
+ }
+ case AMDGPU_INFO_NUM_HANDLES: {
+ struct drm_amdgpu_info_num_handles handle;
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_UVD:
+ /* Starting Polaris, we support unlimited UVD handles */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ handle.uvd_max_handles = adev->uvd.max_handles;
+ handle.uvd_used_handles = amdgpu_uvd_used_handles(adev);
+
+ return copy_to_user(out, &handle,
+ min((size_t)size, sizeof(handle))) ? -EFAULT : 0;
+ } else {
+ return -ENODATA;
+ }
+
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ case AMDGPU_INFO_SENSOR: {
+ if (!adev->pm.dpm_enabled)
+ return -ENOENT;
+
+ switch (info->sensor_info.type) {
+ case AMDGPU_INFO_SENSOR_GFX_SCLK:
+ /* get sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GFX_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_GFX_MCLK:
+ /* get mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GFX_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_TEMP:
+ /* get temperature in millidegrees C */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_TEMP,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_LOAD:
+ /* get GPU load */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_LOAD,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
+ /* get average GPU power */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_AVG_POWER,
+ (void *)&ui32, &ui32_size)) {
+ /* fall back to input power for backwards compat */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ }
+ ui32 >>= 8;
+ break;
+ case AMDGPU_INFO_SENSOR_VDDNB:
+ /* get VDDNB in millivolts */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_VDDNB,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_VDDGFX:
+ /* get VDDGFX in millivolts */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_VDDGFX,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+ /* get stable pstate sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+ /* get stable pstate mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK:
+ /* get peak pstate sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK:
+ /* get peak pstate mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n",
+ info->sensor_info.type);
+ return -EINVAL;
+ }
+ return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_VRAM_LOST_COUNTER:
+ ui32 = atomic_read(&adev->vram_lost_counter);
+ return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+ case AMDGPU_INFO_RAS_ENABLED_FEATURES: {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ uint64_t ras_mask;
+
+ if (!ras)
+ return -EINVAL;
+ ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
+
+ return copy_to_user(out, &ras_mask,
+ min_t(u64, size, sizeof(ras_mask))) ?
+ -EFAULT : 0;
+ }
+ case AMDGPU_INFO_VIDEO_CAPS: {
+ const struct amdgpu_video_codecs *codecs;
+ struct drm_amdgpu_info_video_caps *caps;
+ int r;
+
+ if (!adev->asic_funcs->query_video_codecs)
+ return -EINVAL;
+
+ switch (info->video_cap.type) {
+ case AMDGPU_INFO_VIDEO_CAPS_DECODE:
+ r = amdgpu_asic_query_video_codecs(adev, false, &codecs);
+ if (r)
+ return -EINVAL;
+ break;
+ case AMDGPU_INFO_VIDEO_CAPS_ENCODE:
+ r = amdgpu_asic_query_video_codecs(adev, true, &codecs);
+ if (r)
+ return -EINVAL;
+ break;
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n",
+ info->video_cap.type);
+ return -EINVAL;
+ }
+
+ caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ for (i = 0; i < codecs->codec_count; i++) {
+ int idx = codecs->codec_array[i].codec_type;
+
+ switch (idx) {
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9:
+ case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1:
+ caps->codec_info[idx].valid = 1;
+ caps->codec_info[idx].max_width =
+ codecs->codec_array[i].max_width;
+ caps->codec_info[idx].max_height =
+ codecs->codec_array[i].max_height;
+ caps->codec_info[idx].max_pixels_per_frame =
+ codecs->codec_array[i].max_pixels_per_frame;
+ caps->codec_info[idx].max_level =
+ codecs->codec_array[i].max_level;
+ break;
+ default:
+ break;
+ }
+ }
+ r = copy_to_user(out, caps,
+ min((size_t)size, sizeof(*caps))) ? -EFAULT : 0;
+ kfree(caps);
+ return r;
+ }
+ case AMDGPU_INFO_MAX_IBS: {
+ uint32_t max_ibs[AMDGPU_HW_IP_NUM];
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ max_ibs[i] = amdgpu_ring_max_ibs(i);
+
+ return copy_to_user(out, max_ibs,
+ min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
+ }
+ default:
+ DRM_DEBUG_KMS("Invalid request %d\n", info->query);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+
+/*
+ * Outdated mess for old drm with Xorg being in charge (void function now).
+ */
+/**
+ * amdgpu_driver_lastclose_kms - drm callback for last close
+ *
+ * @dev: drm dev pointer
+ *
+ * Switch vga_switcheroo state after last close (all asics).
+ */
+void amdgpu_driver_lastclose_kms(struct drm_device *dev)
+{
+ drm_fb_helper_lastclose(dev);
+ vga_switcheroo_process_delayed_switch();
+}
+
+/**
+ * amdgpu_driver_open_kms - drm callback for open
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device open, init vm on cayman+ (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv;
+ int r, pasid;
+
+ /* Ensure IB tests are run on ring */
+ flush_delayed_work(&adev->delayed_init_work);
+
+
+ if (amdgpu_ras_intr_triggered()) {
+ DRM_ERROR("RAS Intr triggered, device disabled!!");
+ return -EHWPOISON;
+ }
+
+ file_priv->driver_priv = NULL;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ goto pm_put;
+
+ fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+ if (unlikely(!fpriv)) {
+ r = -ENOMEM;
+ goto out_suspend;
+ }
+
+ pasid = amdgpu_pasid_alloc(16);
+ if (pasid < 0) {
+ dev_warn(adev->dev, "No more PASIDs available!");
+ pasid = 0;
+ }
+
+ r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+ if (r)
+ goto error_pasid;
+
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
+ if (r)
+ goto error_pasid;
+
+ r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
+ if (r)
+ goto error_vm;
+
+ fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
+ if (!fpriv->prt_va) {
+ r = -ENOMEM;
+ goto error_vm;
+ }
+
+ if (adev->gfx.mcbp) {
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+ &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
+ if (r)
+ goto error_vm;
+ }
+
+ mutex_init(&fpriv->bo_list_lock);
+ idr_init_base(&fpriv->bo_list_handles, 1);
+
+ amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
+
+ file_priv->driver_priv = fpriv;
+ goto out_suspend;
+
+error_vm:
+ amdgpu_vm_fini(adev, &fpriv->vm);
+
+error_pasid:
+ if (pasid) {
+ amdgpu_pasid_free(pasid);
+ amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
+ }
+
+ kfree(fpriv);
+
+out_suspend:
+ pm_runtime_mark_last_busy(dev->dev);
+pm_put:
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return r;
+}
+
+/**
+ * amdgpu_driver_postclose_kms - drm callback for post close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device post close, tear down vm on cayman+ (all asics).
+ */
+void amdgpu_driver_postclose_kms(struct drm_device *dev,
+ struct drm_file *file_priv)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct amdgpu_bo_list *list;
+ struct amdgpu_bo *pd;
+ u32 pasid;
+ int handle;
+
+ if (!fpriv)
+ return;
+
+ pm_runtime_get_sync(dev->dev);
+
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
+ amdgpu_uvd_free_handles(adev, file_priv);
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
+ amdgpu_vce_free_handles(adev, file_priv);
+
+ if (fpriv->csa_va) {
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+ fpriv->csa_va, csa_addr));
+ fpriv->csa_va = NULL;
+ }
+
+ pasid = fpriv->vm.pasid;
+ pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+ if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+ amdgpu_vm_bo_del(adev, fpriv->prt_va);
+ amdgpu_bo_unreserve(pd);
+ }
+
+ amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+ amdgpu_vm_fini(adev, &fpriv->vm);
+
+ if (pasid)
+ amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
+ amdgpu_bo_unref(&pd);
+
+ idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
+ amdgpu_bo_list_put(list);
+
+ idr_destroy(&fpriv->bo_list_handles);
+ mutex_destroy(&fpriv->bo_list_lock);
+
+ kfree(fpriv);
+ file_priv->driver_priv = NULL;
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+}
+
+
+void amdgpu_driver_release_kms(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ amdgpu_device_fini_sw(adev);
+ pci_set_drvdata(adev->pdev, NULL);
+}
+
+/*
+ * VBlank related functions.
+ */
+/**
+ * amdgpu_get_vblank_counter_kms - get frame count
+ *
+ * @crtc: crtc to get the frame count from
+ *
+ * Gets the frame count on the requested crtc (all asics).
+ * Returns frame count on success, -EINVAL on failure.
+ */
+u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = crtc->index;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int vpos, hpos, stat;
+ u32 count;
+
+ if (pipe >= adev->mode_info.num_crtc) {
+ DRM_ERROR("Invalid crtc %u\n", pipe);
+ return -EINVAL;
+ }
+
+ /* The hw increments its frame counter at start of vsync, not at start
+ * of vblank, as is required by DRM core vblank counter handling.
+ * Cook the hw count here to make it appear to the caller as if it
+ * incremented at start of vblank. We measure distance to start of
+ * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+ * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+ * result by 1 to give the proper appearance to caller.
+ */
+ if (adev->mode_info.crtcs[pipe]) {
+ /* Repeat readout if needed to provide stable result if
+ * we cross start of vsync during the queries.
+ */
+ do {
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ /* Ask amdgpu_display_get_crtc_scanoutpos to return
+ * vpos as distance to start of vblank, instead of
+ * regular vertical scanout pos.
+ */
+ stat = amdgpu_display_get_crtc_scanoutpos(
+ dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &adev->mode_info.crtcs[pipe]->base.hwmode);
+ } while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+ if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+ DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+ } else {
+ DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+ pipe, vpos);
+
+ /* Bump counter if we are at >= leading edge of vblank,
+ * but before vsync where vpos would turn negative and
+ * the hw counter really increments.
+ */
+ if (vpos >= 0)
+ count++;
+ }
+ } else {
+ /* Fallback to use value as is. */
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+ }
+
+ return count;
+}
+
+/**
+ * amdgpu_enable_vblank_kms - enable vblank interrupt
+ *
+ * @crtc: crtc to enable vblank interrupt for
+ *
+ * Enable the interrupt on the requested crtc (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
+int amdgpu_enable_vblank_kms(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = crtc->index;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
+
+ return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
+}
+
+/**
+ * amdgpu_disable_vblank_kms - disable vblank interrupt
+ *
+ * @crtc: crtc to disable vblank interrupt for
+ *
+ * Disable the interrupt on the requested crtc (all asics).
+ */
+void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = crtc->index;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
+
+ amdgpu_irq_put(adev, &adev->crtc_irq, idx);
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct drm_amdgpu_info_firmware fw_info;
+ struct drm_amdgpu_query_fw query_fw;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
+ int ret, i;
+
+ static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
+#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
+ TA_FW_NAME(XGMI),
+ TA_FW_NAME(RAS),
+ TA_FW_NAME(HDCP),
+ TA_FW_NAME(DTM),
+ TA_FW_NAME(RAP),
+ TA_FW_NAME(SECUREDISPLAY),
+#undef TA_FW_NAME
+ };
+
+ /* VCE */
+ query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* UVD */
+ query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* GMC */
+ query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* ME */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* PFP */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* CE */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST CNTL */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST GPM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST SRM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLCP */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLCV */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MEC */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+ query_fw.index = 0;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MEC2 */
+ if (adev->gfx.mec2_fw) {
+ query_fw.index = 1;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+ }
+
+ /* IMU */
+ query_fw.fw_type = AMDGPU_INFO_FW_IMU;
+ query_fw.index = 0;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* PSP SOS */
+ query_fw.fw_type = AMDGPU_INFO_FW_SOS;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+
+ /* PSP ASD */
+ query_fw.fw_type = AMDGPU_INFO_FW_ASD;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ query_fw.fw_type = AMDGPU_INFO_FW_TA;
+ for (i = TA_FW_TYPE_PSP_XGMI; i < TA_FW_TYPE_MAX_INDEX; i++) {
+ query_fw.index = i;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ continue;
+
+ seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
+ ta_fw_name[i], fw_info.feature, fw_info.ver);
+ }
+
+ /* SMC */
+ query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ smu_program = (fw_info.ver >> 24) & 0xff;
+ smu_major = (fw_info.ver >> 16) & 0xff;
+ smu_minor = (fw_info.ver >> 8) & 0xff;
+ smu_debug = (fw_info.ver >> 0) & 0xff;
+ seq_printf(m, "SMC feature version: %u, program: %d, firmware version: 0x%08x (%d.%d.%d)\n",
+ fw_info.feature, smu_program, fw_info.ver, smu_major, smu_minor, smu_debug);
+
+ /* SDMA */
+ query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ query_fw.index = i;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+ i, fw_info.feature, fw_info.ver);
+ }
+
+ /* VCN */
+ query_fw.fw_type = AMDGPU_INFO_FW_VCN;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* DMCU */
+ query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* DMCUB */
+ query_fw.fw_type = AMDGPU_INFO_FW_DMCUB;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* TOC */
+ query_fw.fw_type = AMDGPU_INFO_FW_TOC;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* CAP */
+ if (adev->psp.cap_fw) {
+ query_fw.fw_type = AMDGPU_INFO_FW_CAP;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "CAP feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+ }
+
+ /* MES_KIQ */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* MES */
+ query_fw.fw_type = AMDGPU_INFO_FW_MES;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_firmware_info);
+
+#endif
+
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file("amdgpu_firmware_info", 0444, root,
+ adev, &amdgpu_debugfs_firmware_info_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
new file mode 100644
index 000000000..4d1d4994e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_lsdma.h"
+
+#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL
+
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev,
+ uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask)
+{
+ uint32_t val;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ udelay(1);
+ }
+
+ return -ETIME;
+}
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size);
+ if (ret)
+ return ret;
+ src_addr += current_copy_size;
+ dst_addr += current_copy_size;
+ mem_size -= current_copy_size;
+ }
+
+ return 0;
+}
+
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t mem_size)
+{
+ int ret;
+
+ if (mem_size == 0)
+ return -EINVAL;
+
+ while (mem_size > 0) {
+ uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+ ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size);
+ if (ret)
+ return ret;
+ dst_addr += current_fill_size;
+ mem_size -= current_fill_size;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
new file mode 100644
index 000000000..c61ba58c5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_LSDMA_H__
+#define __AMDGPU_LSDMA_H__
+
+struct amdgpu_lsdma {
+ const struct amdgpu_lsdma_funcs *funcs;
+};
+
+struct amdgpu_lsdma_funcs {
+ int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t size);
+ int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t size);
+ void (*update_memory_power_gating)(struct amdgpu_device *adev, bool enable);
+};
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
+ uint64_t dst_addr, uint64_t mem_size);
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr,
+ uint32_t data, uint64_t mem_size);
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
new file mode 100644
index 000000000..8d9ff9e15
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+#include "amdgpu_mca.h"
+
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count)
+{
+ uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+ if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count)
+{
+ uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+ if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr)
+{
+ WREG64_PCIE(mc_status_addr, 0x0ULL);
+}
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
+ amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
+
+ amdgpu_mca_reset_error_count(adev, mc_status_addr);
+}
+
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp0.ras)
+ return 0;
+
+ ras = adev->mca.mp0.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mp1.ras)
+ return 0;
+
+ ras = adev->mca.mp1.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mca_ras_block *ras;
+
+ if (!adev->mca.mpio.ras)
+ return 0;
+
+ ras = adev->mca.mpio.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
new file mode 100644
index 000000000..997a073e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MCA_H__
+#define __AMDGPU_MCA_H__
+
+struct amdgpu_mca_ras_block {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_mca_ras {
+ struct ras_common_if *ras_if;
+ struct amdgpu_mca_ras_block *ras;
+};
+
+struct amdgpu_mca {
+ struct amdgpu_mca_ras mp0;
+ struct amdgpu_mca_ras mp1;
+ struct amdgpu_mca_ras mpio;
+};
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count);
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ unsigned long *error_count);
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr);
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+ uint64_t mc_status_addr,
+ void *ras_error_status);
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
new file mode 100644
index 000000000..6aa750523
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -0,0 +1,1478 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_mes.h"
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "amdgpu_mes_ctx.h"
+
+#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define AMDGPU_ONE_DOORBELL_SIZE 8
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
+{
+ return roundup(AMDGPU_ONE_DOORBELL_SIZE *
+ AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+}
+
+static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
+ struct amdgpu_mes_process *process,
+ int ip_type, uint64_t *doorbell_index)
+{
+ unsigned int offset, found;
+ struct amdgpu_mes *mes = &adev->mes;
+
+ if (ip_type == AMDGPU_RING_TYPE_SDMA)
+ offset = adev->doorbell_index.sdma_engine[0];
+ else
+ offset = 0;
+
+ found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
+ if (found >= mes->num_mes_dbs) {
+ DRM_WARN("No doorbell available\n");
+ return -ENOSPC;
+ }
+
+ set_bit(found, mes->doorbell_bitmap);
+
+ /* Get the absolute doorbell index on BAR */
+ *doorbell_index = mes->db_start_dw_offset + found * 2;
+ return 0;
+}
+
+static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
+ struct amdgpu_mes_process *process,
+ uint32_t doorbell_index)
+{
+ unsigned int old, rel_index;
+ struct amdgpu_mes *mes = &adev->mes;
+
+ /* Find the relative index of the doorbell in this object */
+ rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
+ old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
+ WARN_ON(!old);
+}
+
+static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_mes *mes = &adev->mes;
+
+ /* Bitmap for dynamic allocation of kernel doorbells */
+ mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
+ if (!mes->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
+ return -ENOMEM;
+ }
+
+ mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
+ for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
+ adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
+ set_bit(i, mes->doorbell_bitmap);
+ }
+
+ return 0;
+}
+
+static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
+{
+ bitmap_free(adev->mes.doorbell_bitmap);
+}
+
+int amdgpu_mes_init(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ adev->mes.adev = adev;
+
+ idr_init(&adev->mes.pasid_idr);
+ idr_init(&adev->mes.gang_id_idr);
+ idr_init(&adev->mes.queue_id_idr);
+ ida_init(&adev->mes.doorbell_ida);
+ spin_lock_init(&adev->mes.queue_id_lock);
+ spin_lock_init(&adev->mes.ring_lock);
+ mutex_init(&adev->mes.mutex_hidden);
+
+ adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ adev->mes.vmid_mask_mmhub = 0xffffff00;
+ adev->mes.vmid_mask_gfxhub = 0xffffff00;
+
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
+ /* use only 1st MEC pipes */
+ if (i >= 4)
+ continue;
+ adev->mes.compute_hqd_mask[i] = 0xc;
+ }
+
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
+
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+ adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ /* zero sdma_hqd_mask for non-existent engine */
+ else if (adev->sdma.num_instances == 1)
+ adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
+ else
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
+
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ goto error_ids;
+ }
+ adev->mes.sch_ctx_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
+ adev->mes.sch_ctx_ptr =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
+
+ r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
+ if (r) {
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n", r);
+ goto error_ids;
+ }
+ adev->mes.query_status_fence_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
+ adev->mes.query_status_fence_ptr =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
+
+ r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
+ if (r) {
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ dev_err(adev->dev,
+ "(%d) read_val_offs alloc failed\n", r);
+ goto error_ids;
+ }
+ adev->mes.read_val_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
+ adev->mes.read_val_ptr =
+ (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
+
+ r = amdgpu_mes_doorbell_init(adev);
+ if (r)
+ goto error;
+
+ return 0;
+
+error:
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+error_ids:
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+ return r;
+}
+
+void amdgpu_mes_fini(struct amdgpu_device *adev)
+{
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+ amdgpu_mes_doorbell_free(adev);
+
+ idr_destroy(&adev->mes.pasid_idr);
+ idr_destroy(&adev->mes.gang_id_idr);
+ idr_destroy(&adev->mes.queue_id_idr);
+ ida_destroy(&adev->mes.doorbell_ida);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
+}
+
+int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
+ struct amdgpu_vm *vm)
+{
+ struct amdgpu_mes_process *process;
+ int r;
+
+ /* allocate the mes process buffer */
+ process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
+ if (!process) {
+ DRM_ERROR("no more memory to create mes process\n");
+ return -ENOMEM;
+ }
+
+ /* allocate the process context bo and map it */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &process->proc_ctx_bo,
+ &process->proc_ctx_gpu_addr,
+ &process->proc_ctx_cpu_ptr);
+ if (r) {
+ DRM_ERROR("failed to allocate process context bo\n");
+ goto clean_up_memory;
+ }
+ memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ /* add the mes process to idr list */
+ r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to lock pasid=%d\n", pasid);
+ goto clean_up_ctx;
+ }
+
+ INIT_LIST_HEAD(&process->gang_list);
+ process->vm = vm;
+ process->pasid = pasid;
+ process->process_quantum = adev->mes.default_process_quantum;
+ process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+
+ amdgpu_mes_unlock(&adev->mes);
+ return 0;
+
+clean_up_ctx:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_bo_free_kernel(&process->proc_ctx_bo,
+ &process->proc_ctx_gpu_addr,
+ &process->proc_ctx_cpu_ptr);
+clean_up_memory:
+ kfree(process);
+ return r;
+}
+
+void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
+{
+ struct amdgpu_mes_process *process;
+ struct amdgpu_mes_gang *gang, *tmp1;
+ struct amdgpu_mes_queue *queue, *tmp2;
+ struct mes_remove_queue_input queue_input;
+ unsigned long flags;
+ int r;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ process = idr_find(&adev->mes.pasid_idr, pasid);
+ if (!process) {
+ DRM_WARN("pasid %d doesn't exist\n", pasid);
+ amdgpu_mes_unlock(&adev->mes);
+ return;
+ }
+
+ /* Remove all queues from hardware */
+ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+ spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
+ idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+
+ queue_input.doorbell_offset = queue->doorbell_off;
+ queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
+
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes,
+ &queue_input);
+ if (r)
+ DRM_WARN("failed to remove hardware queue\n");
+ }
+
+ idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ }
+
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
+
+ /* free all memory allocated by the process */
+ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+ /* free all queues in the gang */
+ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+ amdgpu_mes_queue_free_mqd(queue);
+ list_del(&queue->list);
+ kfree(queue);
+ }
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+ list_del(&gang->list);
+ kfree(gang);
+
+ }
+ amdgpu_bo_free_kernel(&process->proc_ctx_bo,
+ &process->proc_ctx_gpu_addr,
+ &process->proc_ctx_cpu_ptr);
+ kfree(process);
+}
+
+int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
+ struct amdgpu_mes_gang_properties *gprops,
+ int *gang_id)
+{
+ struct amdgpu_mes_process *process;
+ struct amdgpu_mes_gang *gang;
+ int r;
+
+ /* allocate the mes gang buffer */
+ gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
+ if (!gang) {
+ return -ENOMEM;
+ }
+
+ /* allocate the gang context bo and map it to cpu space */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+ if (r) {
+ DRM_ERROR("failed to allocate process context bo\n");
+ goto clean_up_mem;
+ }
+ memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ process = idr_find(&adev->mes.pasid_idr, pasid);
+ if (!process) {
+ DRM_ERROR("pasid %d doesn't exist\n", pasid);
+ r = -EINVAL;
+ goto clean_up_ctx;
+ }
+
+ /* add the mes gang to idr list */
+ r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to allocate idr for gang\n");
+ goto clean_up_ctx;
+ }
+
+ gang->gang_id = r;
+ *gang_id = r;
+
+ INIT_LIST_HEAD(&gang->queue_list);
+ gang->process = process;
+ gang->priority = gprops->priority;
+ gang->gang_quantum = gprops->gang_quantum ?
+ gprops->gang_quantum : adev->mes.default_gang_quantum;
+ gang->global_priority_level = gprops->global_priority_level;
+ gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
+ list_add_tail(&gang->list, &process->gang_list);
+
+ amdgpu_mes_unlock(&adev->mes);
+ return 0;
+
+clean_up_ctx:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
+ kfree(gang);
+ return r;
+}
+
+int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
+{
+ struct amdgpu_mes_gang *gang;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ amdgpu_mes_unlock(&adev->mes);
+ return -EINVAL;
+ }
+
+ if (!list_empty(&gang->queue_list)) {
+ DRM_ERROR("queue list is not empty\n");
+ amdgpu_mes_unlock(&adev->mes);
+ return -EBUSY;
+ }
+
+ idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ list_del(&gang->list);
+ amdgpu_mes_unlock(&adev->mes);
+
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+
+ kfree(gang);
+
+ return 0;
+}
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev)
+{
+ struct idr *idp;
+ struct amdgpu_mes_process *process;
+ struct amdgpu_mes_gang *gang;
+ struct mes_suspend_gang_input input;
+ int r, pasid;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ idp = &adev->mes.pasid_idr;
+
+ idr_for_each_entry(idp, process, pasid) {
+ list_for_each_entry(gang, &process->gang_list, list) {
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
+ if (r)
+ DRM_ERROR("failed to suspend pasid %d gangid %d",
+ pasid, gang->gang_id);
+ }
+ }
+
+ amdgpu_mes_unlock(&adev->mes);
+ return 0;
+}
+
+int amdgpu_mes_resume(struct amdgpu_device *adev)
+{
+ struct idr *idp;
+ struct amdgpu_mes_process *process;
+ struct amdgpu_mes_gang *gang;
+ struct mes_resume_gang_input input;
+ int r, pasid;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ idp = &adev->mes.pasid_idr;
+
+ idr_for_each_entry(idp, process, pasid) {
+ list_for_each_entry(gang, &process->gang_list, list) {
+ r = adev->mes.funcs->resume_gang(&adev->mes, &input);
+ if (r)
+ DRM_ERROR("failed to resume pasid %d gangid %d",
+ pasid, gang->gang_id);
+ }
+ }
+
+ amdgpu_mes_unlock(&adev->mes);
+ return 0;
+}
+
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
+ struct amdgpu_mes_queue *q,
+ struct amdgpu_mes_queue_properties *p)
+{
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+ u32 mqd_size = mqd_mgr->mqd_size;
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &q->mqd_obj,
+ &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
+ return r;
+ }
+ memset(q->mqd_cpu_ptr, 0, mqd_size);
+
+ r = amdgpu_bo_reserve(q->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto clean_up;
+
+ return 0;
+
+clean_up:
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
+ return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+ struct amdgpu_mes_queue *q,
+ struct amdgpu_mes_queue_properties *p)
+{
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+ struct amdgpu_mqd_prop mqd_prop = {0};
+
+ mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
+ mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
+ mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
+ mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
+ mqd_prop.queue_size = p->queue_size;
+ mqd_prop.use_doorbell = true;
+ mqd_prop.doorbell_index = p->doorbell_off;
+ mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
+ mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
+ mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
+ mqd_prop.hqd_active = false;
+
+ if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+ p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
+ }
+
+ mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
+
+ if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+ p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ amdgpu_bo_unreserve(q->mqd_obj);
+}
+
+int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
+ struct amdgpu_mes_queue_properties *qprops,
+ int *queue_id)
+{
+ struct amdgpu_mes_queue *queue;
+ struct amdgpu_mes_gang *gang;
+ struct mes_add_queue_input queue_input;
+ unsigned long flags;
+ int r;
+
+ memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
+
+ /* allocate the mes queue buffer */
+ queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
+ if (!queue) {
+ DRM_ERROR("Failed to allocate memory for queue\n");
+ return -ENOMEM;
+ }
+
+ /* Allocate the queue mqd */
+ r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+ if (r)
+ goto clean_up_memory;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ r = -EINVAL;
+ goto clean_up_mqd;
+ }
+
+ /* add the mes gang to idr list */
+ spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
+ r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
+ GFP_ATOMIC);
+ if (r < 0) {
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+ goto clean_up_mqd;
+ }
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+ *queue_id = queue->queue_id = r;
+
+ /* allocate a doorbell index for the queue */
+ r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
+ qprops->queue_type,
+ &qprops->doorbell_off);
+ if (r)
+ goto clean_up_queue_id;
+
+ /* initialize the queue mqd */
+ amdgpu_mes_queue_init_mqd(adev, queue, qprops);
+
+ /* add hw queue to mes */
+ queue_input.process_id = gang->process->pasid;
+
+ queue_input.page_table_base_addr =
+ adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+ adev->gmc.vram_start;
+
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end =
+ (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
+ queue_input.process_quantum = gang->process->process_quantum;
+ queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
+ queue_input.gang_quantum = gang->gang_quantum;
+ queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
+ queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
+ queue_input.gang_global_priority_level = gang->global_priority_level;
+ queue_input.doorbell_offset = qprops->doorbell_off;
+ queue_input.mqd_addr = queue->mqd_gpu_addr;
+ queue_input.wptr_addr = qprops->wptr_gpu_addr;
+ queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
+ queue_input.queue_type = qprops->queue_type;
+ queue_input.paging = qprops->paging;
+ queue_input.is_kfd_process = 0;
+
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ if (r) {
+ DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
+ qprops->doorbell_off);
+ goto clean_up_doorbell;
+ }
+
+ DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
+ "queue type=%d, doorbell=0x%llx\n",
+ gang->process->pasid, gang_id, qprops->queue_type,
+ qprops->doorbell_off);
+
+ queue->ring = qprops->ring;
+ queue->doorbell_off = qprops->doorbell_off;
+ queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
+ queue->queue_type = qprops->queue_type;
+ queue->paging = qprops->paging;
+ queue->gang = gang;
+ queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
+ list_add_tail(&queue->list, &gang->queue_list);
+
+ amdgpu_mes_unlock(&adev->mes);
+ return 0;
+
+clean_up_doorbell:
+ amdgpu_mes_kernel_doorbell_free(adev, gang->process,
+ qprops->doorbell_off);
+clean_up_queue_id:
+ spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
+ idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_mes_queue_free_mqd(queue);
+clean_up_memory:
+ kfree(queue);
+ return r;
+}
+
+int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
+{
+ unsigned long flags;
+ struct amdgpu_mes_queue *queue;
+ struct amdgpu_mes_gang *gang;
+ struct mes_remove_queue_input queue_input;
+ int r;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ /* remove the mes gang from idr list */
+ spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
+
+ queue = idr_find(&adev->mes.queue_id_idr, queue_id);
+ if (!queue) {
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+ amdgpu_mes_unlock(&adev->mes);
+ DRM_ERROR("queue id %d doesn't exist\n", queue_id);
+ return -EINVAL;
+ }
+
+ idr_remove(&adev->mes.queue_id_idr, queue_id);
+ spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+
+ DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
+ queue->doorbell_off);
+
+ gang = queue->gang;
+ queue_input.doorbell_offset = queue->doorbell_off;
+ queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
+
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
+ queue_id);
+
+ list_del(&queue->list);
+ amdgpu_mes_kernel_doorbell_free(adev, gang->process,
+ queue->doorbell_off);
+ amdgpu_mes_unlock(&adev->mes);
+
+ amdgpu_mes_queue_free_mqd(queue);
+ kfree(queue);
+ return 0;
+}
+
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to unmap legacy queue\n");
+
+ return r;
+}
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ struct mes_misc_op_input op_input;
+ int r, val = 0;
+
+ op_input.op = MES_MISC_OP_READ_REG;
+ op_input.read_reg.reg_offset = reg;
+ op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes rreg is not supported!\n");
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to read reg (0x%x)\n", reg);
+ else
+ val = *(adev->mes.read_val_ptr);
+
+error:
+ return val;
+}
+
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRITE_REG;
+ op_input.write_reg.reg_offset = reg;
+ op_input.write_reg.reg_value = val;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes wreg is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to write reg (0x%x)\n", reg);
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
+ op_input.wrm_reg.reg0 = reg0;
+ op_input.wrm_reg.reg1 = reg1;
+ op_input.wrm_reg.ref = ref;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to reg_write_reg_wait\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WAIT;
+ op_input.wrm_reg.reg0 = reg;
+ op_input.wrm_reg.ref = val;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes reg wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to reg_write_reg_wait\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes set shader debugger is not supported!\n");
+ return -EINVAL;
+ }
+
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.u32all = flags;
+ op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
+ memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+ sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+ op_input.set_shader_debugger.trap_en = trap_en;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to set_shader_debugger\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+
+ return r;
+}
+
+static void
+amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_mes_queue_properties *props)
+{
+ props->queue_type = ring->funcs->type;
+ props->hqd_base_gpu_addr = ring->gpu_addr;
+ props->rptr_gpu_addr = ring->rptr_gpu_addr;
+ props->wptr_gpu_addr = ring->wptr_gpu_addr;
+ props->wptr_mc_addr =
+ ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
+ props->queue_size = ring->ring_size;
+ props->eop_gpu_addr = ring->eop_gpu_addr;
+ props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ props->paging = false;
+ props->ring = ring;
+}
+
+#define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \
+do { \
+ if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \
+ return offsetof(struct amdgpu_mes_ctx_meta_data, \
+ _eng[ring->idx].slots[id_offs]); \
+ else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \
+ return offsetof(struct amdgpu_mes_ctx_meta_data, \
+ _eng[ring->idx].ring); \
+ else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \
+ return offsetof(struct amdgpu_mes_ctx_meta_data, \
+ _eng[ring->idx].ib); \
+ else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \
+ return offsetof(struct amdgpu_mes_ctx_meta_data, \
+ _eng[ring->idx].padding); \
+} while(0)
+
+int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
+{
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
+ break;
+ default:
+ break;
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
+ int queue_type, int idx,
+ struct amdgpu_mes_ctx_data *ctx_data,
+ struct amdgpu_ring **out)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_mes_gang *gang;
+ struct amdgpu_mes_queue_properties qprops = {0};
+ int r, queue_id, pasid;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ amdgpu_mes_unlock(&adev->mes);
+ return -EINVAL;
+ }
+ pasid = gang->process->pasid;
+
+ ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
+ if (!ring) {
+ amdgpu_mes_unlock(&adev->mes);
+ return -ENOMEM;
+ }
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->is_mes_queue = true;
+ ring->mes_ctx = ctx_data;
+ ring->idx = idx;
+ ring->no_scheduler = true;
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ compute[ring->idx].mec_hpd);
+ ring->eop_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ }
+
+ switch (queue_type) {
+ case AMDGPU_RING_TYPE_GFX:
+ ring->funcs = adev->gfx.gfx_ring[0].funcs;
+ ring->me = adev->gfx.gfx_ring[0].me;
+ ring->pipe = adev->gfx.gfx_ring[0].pipe;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ ring->funcs = adev->gfx.compute_ring[0].funcs;
+ ring->me = adev->gfx.compute_ring[0].me;
+ ring->pipe = adev->gfx.compute_ring[0].pipe;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ring->funcs = adev->sdma.instance[0].ring.funcs;
+ break;
+ default:
+ BUG();
+ }
+
+ r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ goto clean_up_memory;
+
+ amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
+
+ dma_fence_wait(gang->process->vm->last_update, false);
+ dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
+ amdgpu_mes_unlock(&adev->mes);
+
+ r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
+ if (r)
+ goto clean_up_ring;
+
+ ring->hw_queue_id = queue_id;
+ ring->doorbell_index = qprops.doorbell_off;
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
+ queue_id);
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
+ queue_id);
+ else
+ BUG();
+
+ *out = ring;
+ return 0;
+
+clean_up_ring:
+ amdgpu_ring_fini(ring);
+clean_up_memory:
+ kfree(ring);
+ amdgpu_mes_unlock(&adev->mes);
+ return r;
+}
+
+void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring)
+ return;
+
+ amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
+ amdgpu_ring_fini(ring);
+ kfree(ring);
+}
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio)
+{
+ return adev->mes.aggregated_doorbells[prio];
+}
+
+int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data)
+{
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev,
+ sizeof(struct amdgpu_mes_ctx_meta_data),
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &ctx_data->meta_data_obj,
+ &ctx_data->meta_data_mc_addr,
+ &ctx_data->meta_data_ptr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
+ return r;
+ }
+
+ if (!ctx_data->meta_data_obj)
+ return -ENOMEM;
+
+ memset(ctx_data->meta_data_ptr, 0,
+ sizeof(struct amdgpu_mes_ctx_meta_data));
+
+ return 0;
+}
+
+void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
+{
+ if (ctx_data->meta_data_obj)
+ amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
+ &ctx_data->meta_data_mc_addr,
+ &ctx_data->meta_data_ptr);
+}
+
+int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_mes_ctx_data *ctx_data)
+{
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_sync sync;
+ struct drm_exec exec;
+ int r;
+
+ amdgpu_sync_create(&sync);
+
+ drm_exec_init(&exec, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_lock_obj(&exec,
+ &ctx_data->meta_data_obj->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error_fini_exec;
+
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error_fini_exec;
+ }
+
+ bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
+ if (!bo_va) {
+ DRM_ERROR("failed to create bo_va for meta data BO\n");
+ r = -ENOMEM;
+ goto error_fini_exec;
+ }
+
+ r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
+ sizeof(struct amdgpu_mes_ctx_meta_data),
+ AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+ AMDGPU_PTE_EXECUTABLE);
+
+ if (r) {
+ DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
+ goto error_del_bo_va;
+ }
+
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r) {
+ DRM_ERROR("failed to do vm_bo_update on meta data\n");
+ goto error_del_bo_va;
+ }
+ amdgpu_sync_fence(&sync, bo_va->last_pt_update);
+
+ r = amdgpu_vm_update_pdes(adev, vm, false);
+ if (r) {
+ DRM_ERROR("failed to update pdes on meta data\n");
+ goto error_del_bo_va;
+ }
+ amdgpu_sync_fence(&sync, vm->last_update);
+
+ amdgpu_sync_wait(&sync, false);
+ drm_exec_fini(&exec);
+
+ amdgpu_sync_free(&sync);
+ ctx_data->meta_data_va = bo_va;
+ return 0;
+
+error_del_bo_va:
+ amdgpu_vm_bo_del(adev, bo_va);
+
+error_fini_exec:
+ drm_exec_fini(&exec);
+ amdgpu_sync_free(&sync);
+ return r;
+}
+
+int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data)
+{
+ struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
+ struct amdgpu_bo *bo = ctx_data->meta_data_obj;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct dma_fence *fence;
+ struct drm_exec exec;
+ long r;
+
+ drm_exec_init(&exec, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_lock_obj(&exec,
+ &ctx_data->meta_data_obj->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto out_unlock;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+ if (!amdgpu_vm_ready(vm))
+ goto out_unlock;
+
+ r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ &fence);
+ if (r)
+ goto out_unlock;
+ if (fence) {
+ amdgpu_bo_fence(bo, fence, true);
+ fence = NULL;
+ }
+
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (r || !fence)
+ goto out_unlock;
+
+ dma_fence_wait(fence, false);
+ amdgpu_bo_fence(bo, fence, true);
+ dma_fence_put(fence);
+
+out_unlock:
+ if (unlikely(r < 0))
+ dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
+ drm_exec_fini(&exec);
+
+ return r;
+}
+
+static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
+ int pasid, int *gang_id,
+ int queue_type, int num_queue,
+ struct amdgpu_ring **added_rings,
+ struct amdgpu_mes_ctx_data *ctx_data)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_mes_gang_properties gprops = {0};
+ int r, j;
+
+ /* create a gang for the process */
+ gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ gprops.gang_quantum = adev->mes.default_gang_quantum;
+ gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+
+ r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
+ if (r) {
+ DRM_ERROR("failed to add gang\n");
+ return r;
+ }
+
+ /* create queues for the gang */
+ for (j = 0; j < num_queue; j++) {
+ r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
+ ctx_data, &ring);
+ if (r) {
+ DRM_ERROR("failed to add ring\n");
+ break;
+ }
+
+ DRM_INFO("ring %s was added\n", ring->name);
+ added_rings[j] = ring;
+ }
+
+ return 0;
+}
+
+static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
+ ring = added_rings[i];
+ if (!ring)
+ continue;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ r = amdgpu_ring_test_ib(ring, 1000 * 10);
+ if (r) {
+ DRM_DEV_ERROR(ring->adev->dev,
+ "ring %s ib test failed (%d)\n",
+ ring->name, r);
+ return r;
+ } else
+ DRM_INFO("ring %s ib test pass\n", ring->name);
+ }
+
+ return 0;
+}
+
+int amdgpu_mes_self_test(struct amdgpu_device *adev)
+{
+ struct amdgpu_vm *vm = NULL;
+ struct amdgpu_mes_ctx_data ctx_data = {0};
+ struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
+ int gang_ids[3] = {0};
+ int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
+ { AMDGPU_RING_TYPE_COMPUTE, 1 },
+ { AMDGPU_RING_TYPE_SDMA, 1} };
+ int i, r, pasid, k = 0;
+
+ pasid = amdgpu_pasid_alloc(16);
+ if (pasid < 0) {
+ dev_warn(adev->dev, "No more PASIDs available!");
+ pasid = 0;
+ }
+
+ vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+ if (!vm) {
+ r = -ENOMEM;
+ goto error_pasid;
+ }
+
+ r = amdgpu_vm_init(adev, vm, -1);
+ if (r) {
+ DRM_ERROR("failed to initialize vm\n");
+ goto error_pasid;
+ }
+
+ r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
+ if (r) {
+ DRM_ERROR("failed to alloc ctx meta data\n");
+ goto error_fini;
+ }
+
+ ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
+ r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
+ if (r) {
+ DRM_ERROR("failed to map ctx meta data\n");
+ goto error_vm;
+ }
+
+ r = amdgpu_mes_create_process(adev, pasid, vm);
+ if (r) {
+ DRM_ERROR("failed to create MES process\n");
+ goto error_vm;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
+ /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+ queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
+ continue;
+
+ r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
+ &gang_ids[i],
+ queue_types[i][0],
+ queue_types[i][1],
+ &added_rings[k],
+ &ctx_data);
+ if (r)
+ goto error_queues;
+
+ k += queue_types[i][1];
+ }
+
+ /* start ring test and ib test for MES queues */
+ amdgpu_mes_test_queues(added_rings);
+
+error_queues:
+ /* remove all queues */
+ for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
+ if (!added_rings[i])
+ continue;
+ amdgpu_mes_remove_ring(adev, added_rings[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
+ if (!gang_ids[i])
+ continue;
+ amdgpu_mes_remove_gang(adev, gang_ids[i]);
+ }
+
+ amdgpu_mes_destroy_process(adev, pasid);
+
+error_vm:
+ amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
+
+error_fini:
+ amdgpu_vm_fini(adev, vm);
+
+error_pasid:
+ if (pasid)
+ amdgpu_pasid_free(pasid);
+
+ amdgpu_mes_ctx_free_meta_data(&ctx_data);
+ kfree(vm);
+ return 0;
+}
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ struct amdgpu_firmware_info *info;
+ char ucode_prefix[30];
+ char fw_name[40];
+ bool need_retry = false;
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
+ need_retry = true;
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+ ucode_prefix,
+ pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
+ if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
+ ucode_prefix);
+ DRM_INFO("try to fall back to %s\n", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+ fw_name);
+ }
+
+ if (r)
+ goto out;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+ adev->mes.uc_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+ adev->mes.data_start_addr[pipe] =
+ le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ int ucode, ucode_data;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ ucode = AMDGPU_UCODE_ID_CP_MES;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+ } else {
+ ucode = AMDGPU_UCODE_ID_CP_MES1;
+ ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ }
+
+ info = &adev->firmware.ucode[ucode];
+ info->ucode_id = ucode;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+ info = &adev->firmware.ucode[ucode_data];
+ info->ucode_id = ucode_data;
+ info->fw = adev->mes.fw[pipe];
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
new file mode 100644
index 000000000..a27b424ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_H__
+#define __AMDGPU_MES_H__
+
+#include "amdgpu_irq.h"
+#include "kgd_kfd_interface.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_doorbell.h"
+#include <linux/sched/mm.h>
+
+#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
+#define AMDGPU_MES_MAX_GFX_PIPES 2
+#define AMDGPU_MES_MAX_SDMA_PIPES 2
+
+#define AMDGPU_MES_API_VERSION_SHIFT 12
+#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
+
+#define AMDGPU_MES_VERSION_MASK 0x00000fff
+#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
+#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+
+enum amdgpu_mes_priority_level {
+ AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
+ AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
+ AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
+ AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
+ AMDGPU_MES_PRIORITY_NUM_LEVELS
+};
+
+#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+
+struct amdgpu_mes_funcs;
+
+enum admgpu_mes_pipe {
+ AMDGPU_MES_SCHED_PIPE = 0,
+ AMDGPU_MES_KIQ_PIPE,
+ AMDGPU_MAX_MES_PIPES = 2,
+};
+
+struct amdgpu_mes {
+ struct amdgpu_device *adev;
+
+ struct mutex mutex_hidden;
+
+ struct idr pasid_idr;
+ struct idr gang_id_idr;
+ struct idr queue_id_idr;
+ struct ida doorbell_ida;
+
+ spinlock_t queue_id_lock;
+
+ uint32_t sched_version;
+ uint32_t kiq_version;
+
+ uint32_t total_max_queue;
+ uint32_t max_doorbell_slices;
+
+ uint64_t default_process_quantum;
+ uint64_t default_gang_quantum;
+
+ struct amdgpu_ring ring;
+ spinlock_t ring_lock;
+
+ const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
+
+ /* mes ucode */
+ struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
+
+ /* mes ucode data */
+ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
+
+ /* eop gpu obj */
+ struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
+
+ void *mqd_backup[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
+
+ uint32_t vmid_mask_gfxhub;
+ uint32_t vmid_mask_mmhub;
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
+ uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
+ uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+ uint32_t sch_ctx_offs;
+ uint64_t sch_ctx_gpu_addr;
+ uint64_t *sch_ctx_ptr;
+ uint32_t query_status_fence_offs;
+ uint64_t query_status_fence_gpu_addr;
+ uint64_t *query_status_fence_ptr;
+ uint32_t read_val_offs;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+
+ uint32_t saved_flags;
+
+ /* initialize kiq pipe */
+ int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
+
+ /* MES doorbells */
+ uint32_t db_start_dw_offset;
+ uint32_t num_mes_dbs;
+ unsigned long *doorbell_bitmap;
+
+ /* ip specific functions */
+ const struct amdgpu_mes_funcs *funcs;
+};
+
+struct amdgpu_mes_process {
+ int pasid;
+ struct amdgpu_vm *vm;
+ uint64_t pd_gpu_addr;
+ struct amdgpu_bo *proc_ctx_bo;
+ uint64_t proc_ctx_gpu_addr;
+ void *proc_ctx_cpu_ptr;
+ uint64_t process_quantum;
+ struct list_head gang_list;
+ uint32_t doorbell_index;
+ struct mutex doorbell_lock;
+};
+
+struct amdgpu_mes_gang {
+ int gang_id;
+ int priority;
+ int inprocess_gang_priority;
+ int global_priority_level;
+ struct list_head list;
+ struct amdgpu_mes_process *process;
+ struct amdgpu_bo *gang_ctx_bo;
+ uint64_t gang_ctx_gpu_addr;
+ void *gang_ctx_cpu_ptr;
+ uint64_t gang_quantum;
+ struct list_head queue_list;
+};
+
+struct amdgpu_mes_queue {
+ struct list_head list;
+ struct amdgpu_mes_gang *gang;
+ int queue_id;
+ uint64_t doorbell_off;
+ struct amdgpu_bo *mqd_obj;
+ void *mqd_cpu_ptr;
+ uint64_t mqd_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ int queue_type;
+ int paging;
+ struct amdgpu_ring *ring;
+};
+
+struct amdgpu_mes_queue_properties {
+ int queue_type;
+ uint64_t hqd_base_gpu_addr;
+ uint64_t rptr_gpu_addr;
+ uint64_t wptr_gpu_addr;
+ uint64_t wptr_mc_addr;
+ uint32_t queue_size;
+ uint64_t eop_gpu_addr;
+ uint32_t hqd_pipe_priority;
+ uint32_t hqd_queue_priority;
+ bool paging;
+ struct amdgpu_ring *ring;
+ /* out */
+ uint64_t doorbell_off;
+};
+
+struct amdgpu_mes_gang_properties {
+ uint32_t priority;
+ uint32_t gang_quantum;
+ uint32_t inprocess_gang_priority;
+ uint32_t priority_level;
+ int global_priority_level;
+};
+
+struct mes_add_queue_input {
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_context_addr;
+ uint64_t gang_quantum;
+ uint64_t gang_context_addr;
+ uint32_t inprocess_gang_priority;
+ uint32_t gang_global_priority_level;
+ uint32_t doorbell_offset;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ uint64_t wptr_mc_addr;
+ uint32_t queue_type;
+ uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ uint32_t trap_en;
+ uint32_t skip_process_ctx_clear;
+ uint32_t is_kfd_process;
+ uint32_t is_aql_queue;
+ uint32_t queue_size;
+ uint32_t exclusively_scheduled;
+};
+
+struct mes_remove_queue_input {
+ uint32_t doorbell_offset;
+ uint64_t gang_context_addr;
+};
+
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
+};
+
+struct mes_suspend_gang_input {
+ bool suspend_all_gangs;
+ uint64_t gang_context_addr;
+ uint64_t suspend_fence_addr;
+ uint32_t suspend_fence_value;
+};
+
+struct mes_resume_gang_input {
+ bool resume_all_gangs;
+ uint64_t gang_context_addr;
+};
+
+enum mes_misc_opcode {
+ MES_MISC_OP_WRITE_REG,
+ MES_MISC_OP_READ_REG,
+ MES_MISC_OP_WRM_REG_WAIT,
+ MES_MISC_OP_WRM_REG_WR_WAIT,
+ MES_MISC_OP_SET_SHADER_DEBUGGER,
+};
+
+struct mes_misc_op_input {
+ enum mes_misc_opcode op;
+
+ union {
+ struct {
+ uint32_t reg_offset;
+ uint64_t buffer_addr;
+ } read_reg;
+
+ struct {
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ } write_reg;
+
+ struct {
+ uint32_t ref;
+ uint32_t mask;
+ uint32_t reg0;
+ uint32_t reg1;
+ } wrm_reg;
+
+ struct {
+ uint64_t process_context_addr;
+ union {
+ struct {
+ uint64_t single_memop : 1;
+ uint64_t single_alu_op : 1;
+ uint64_t reserved: 30;
+ };
+ uint32_t u32all;
+ } flags;
+ uint32_t spi_gdbg_per_vmid_cntl;
+ uint32_t tcp_watch_cntl[4];
+ uint32_t trap_en;
+ } set_shader_debugger;
+ };
+};
+
+struct amdgpu_mes_funcs {
+ int (*add_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input);
+
+ int (*remove_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input);
+
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
+ int (*suspend_gang)(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input);
+
+ int (*resume_gang)(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input);
+
+ int (*misc_op)(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input);
+};
+
+#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+
+int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
+int amdgpu_mes_init(struct amdgpu_device *adev);
+void amdgpu_mes_fini(struct amdgpu_device *adev);
+
+int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
+ struct amdgpu_vm *vm);
+void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
+
+int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
+ struct amdgpu_mes_gang_properties *gprops,
+ int *gang_id);
+int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev);
+int amdgpu_mes_resume(struct amdgpu_device *adev);
+
+int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
+ struct amdgpu_mes_queue_properties *qprops,
+ int *queue_id);
+int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
+
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val);
+int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t mask);
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr,
+ uint32_t spi_gdbg_per_vmid_cntl,
+ const uint32_t *tcp_watch_cntl,
+ uint32_t flags,
+ bool trap_en);
+
+int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
+ int queue_type, int idx,
+ struct amdgpu_mes_ctx_data *ctx_data,
+ struct amdgpu_ring **out);
+void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio);
+
+int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data);
+void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
+int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_mes_ctx_data *ctx_data);
+int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data);
+
+int amdgpu_mes_self_test(struct amdgpu_device *adev);
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
+#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
new file mode 100644
index 000000000..912a5be2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_CTX_H__
+#define __AMDGPU_MES_CTX_H__
+
+#include "v10_structs.h"
+
+enum {
+ AMDGPU_MES_CTX_RPTR_OFFS = 0,
+ AMDGPU_MES_CTX_WPTR_OFFS,
+ AMDGPU_MES_CTX_FENCE_OFFS,
+ AMDGPU_MES_CTX_COND_EXE_OFFS,
+ AMDGPU_MES_CTX_TRAIL_FENCE_OFFS,
+ AMDGPU_MES_CTX_MAX_OFFS,
+};
+
+enum {
+ AMDGPU_MES_CTX_RING_OFFS = AMDGPU_MES_CTX_MAX_OFFS,
+ AMDGPU_MES_CTX_IB_OFFS,
+ AMDGPU_MES_CTX_PADDING_OFFS,
+};
+
+#define AMDGPU_MES_CTX_MAX_GFX_RINGS 1
+#define AMDGPU_MES_CTX_MAX_COMPUTE_RINGS 4
+#define AMDGPU_MES_CTX_MAX_SDMA_RINGS 2
+#define AMDGPU_MES_CTX_MAX_RINGS \
+ (AMDGPU_MES_CTX_MAX_GFX_RINGS + \
+ AMDGPU_MES_CTX_MAX_COMPUTE_RINGS + \
+ AMDGPU_MES_CTX_MAX_SDMA_RINGS)
+
+#define AMDGPU_CSA_SDMA_SIZE 64
+#define GFX10_MEC_HPD_SIZE 2048
+
+struct amdgpu_wb_slot {
+ uint32_t data[8];
+};
+
+struct amdgpu_mes_ctx_meta_data {
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* gfx csa */
+ struct v10_gfx_meta_data gfx_meta_data;
+
+ uint8_t gds_backup[64 * 1024];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) gfx[AMDGPU_MES_CTX_MAX_GFX_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ uint8_t mec_hpd[GFX10_MEC_HPD_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) compute[AMDGPU_MES_CTX_MAX_COMPUTE_RINGS];
+
+ struct {
+ uint8_t ring[PAGE_SIZE * 4];
+
+ /* sdma csa for mcbp */
+ uint8_t sdma_meta_data[AMDGPU_CSA_SDMA_SIZE];
+
+ struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+ /* only for ib test */
+ uint32_t ib[256] __aligned(256);
+
+ uint32_t padding[64];
+
+ } __aligned(PAGE_SIZE) sdma[AMDGPU_MES_CTX_MAX_SDMA_RINGS];
+};
+
+struct amdgpu_mes_ctx_data {
+ struct amdgpu_bo *meta_data_obj;
+ uint64_t meta_data_gpu_addr;
+ uint64_t meta_data_mc_addr;
+ struct amdgpu_bo_va *meta_data_va;
+ void *meta_data_ptr;
+ uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
+};
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG 0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
new file mode 100644
index 000000000..0f6b1021f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_mmhub_ras *ras;
+
+ if (!adev->mmhub.ras)
+ return 0;
+
+ ras = adev->mmhub.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register mmhub ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "mmhub");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mmhub.ras_if = &ras->ras_block.ras_comm;
+
+ /* mmhub ras follows amdgpu_ras_block_late_init_default for late init */
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
new file mode 100644
index 000000000..1ca9d4ed8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MMHUB_H__
+#define __AMDGPU_MMHUB_H__
+
+enum amdgpu_mmhub_ras_memory_id {
+ AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
+ AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
+ AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
+ AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
+ AMDGPU_MMHUB_WIO_CMDMEM = 4,
+ AMDGPU_MMHUB_RIO_CMDMEM = 5,
+ AMDGPU_MMHUB_WGMI_CMDMEM = 6,
+ AMDGPU_MMHUB_RGMI_CMDMEM = 7,
+ AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
+ AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
+ AMDGPU_MMHUB_MAM_DMEM0 = 10,
+ AMDGPU_MMHUB_MAM_DMEM1 = 11,
+ AMDGPU_MMHUB_MAM_DMEM2 = 12,
+ AMDGPU_MMHUB_MAM_DMEM3 = 13,
+ AMDGPU_MMHUB_WRET_TAGMEM = 19,
+ AMDGPU_MMHUB_RRET_TAGMEM = 20,
+ AMDGPU_MMHUB_WIO_DATAMEM = 21,
+ AMDGPU_MMHUB_WGMI_DATAMEM = 22,
+ AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
+ AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_mmhub_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_mmhub_funcs {
+ u64 (*get_fb_location)(struct amdgpu_device *adev);
+ u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
+ void (*init)(struct amdgpu_device *adev);
+ int (*gart_enable)(struct amdgpu_device *adev);
+ void (*set_fault_enable_default)(struct amdgpu_device *adev,
+ bool value);
+ void (*gart_disable)(struct amdgpu_device *adev);
+ int (*set_clockgating)(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+ void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags);
+ void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
+ void (*update_power_gating)(struct amdgpu_device *adev,
+ bool enable);
+};
+
+struct amdgpu_mmhub {
+ struct ras_common_if *ras_if;
+ const struct amdgpu_mmhub_funcs *funcs;
+ struct amdgpu_mmhub_ras *ras;
+};
+
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
new file mode 100644
index 000000000..32fe05c81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -0,0 +1,618 @@
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ * VA Linux Systems Inc., Fremont, California.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original Authors:
+ * Kevin E. Martin, Rickard E. Faith, Alan Hourihane
+ *
+ * Kernel port Author: Dave Airlie
+ */
+
+#ifndef AMDGPU_MODE_H
+#define AMDGPU_MODE_H
+
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_probe_helper.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/hrtimer.h>
+#include "amdgpu_irq.h"
+
+#include <drm/display/drm_dp_mst_helper.h>
+#include "modules/inc/mod_freesync.h"
+#include "amdgpu_dm_irq_params.h"
+
+struct amdgpu_bo;
+struct amdgpu_device;
+struct amdgpu_encoder;
+struct amdgpu_router;
+struct amdgpu_hpd;
+
+#define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
+#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
+#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
+#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
+
+#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base)
+
+#define AMDGPU_MAX_HPD_PINS 6
+#define AMDGPU_MAX_CRTCS 6
+#define AMDGPU_MAX_PLANES 6
+#define AMDGPU_MAX_AFMT_BLOCKS 9
+
+enum amdgpu_rmx_type {
+ RMX_OFF,
+ RMX_FULL,
+ RMX_CENTER,
+ RMX_ASPECT
+};
+
+enum amdgpu_underscan_type {
+ UNDERSCAN_OFF,
+ UNDERSCAN_ON,
+ UNDERSCAN_AUTO,
+};
+
+#define AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS 50
+#define AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS 10
+
+enum amdgpu_hpd_id {
+ AMDGPU_HPD_1 = 0,
+ AMDGPU_HPD_2,
+ AMDGPU_HPD_3,
+ AMDGPU_HPD_4,
+ AMDGPU_HPD_5,
+ AMDGPU_HPD_6,
+ AMDGPU_HPD_NONE = 0xff,
+};
+
+enum amdgpu_crtc_irq {
+ AMDGPU_CRTC_IRQ_VBLANK1 = 0,
+ AMDGPU_CRTC_IRQ_VBLANK2,
+ AMDGPU_CRTC_IRQ_VBLANK3,
+ AMDGPU_CRTC_IRQ_VBLANK4,
+ AMDGPU_CRTC_IRQ_VBLANK5,
+ AMDGPU_CRTC_IRQ_VBLANK6,
+ AMDGPU_CRTC_IRQ_VLINE1,
+ AMDGPU_CRTC_IRQ_VLINE2,
+ AMDGPU_CRTC_IRQ_VLINE3,
+ AMDGPU_CRTC_IRQ_VLINE4,
+ AMDGPU_CRTC_IRQ_VLINE5,
+ AMDGPU_CRTC_IRQ_VLINE6,
+ AMDGPU_CRTC_IRQ_NONE = 0xff
+};
+
+enum amdgpu_pageflip_irq {
+ AMDGPU_PAGEFLIP_IRQ_D1 = 0,
+ AMDGPU_PAGEFLIP_IRQ_D2,
+ AMDGPU_PAGEFLIP_IRQ_D3,
+ AMDGPU_PAGEFLIP_IRQ_D4,
+ AMDGPU_PAGEFLIP_IRQ_D5,
+ AMDGPU_PAGEFLIP_IRQ_D6,
+ AMDGPU_PAGEFLIP_IRQ_NONE = 0xff
+};
+
+enum amdgpu_flip_status {
+ AMDGPU_FLIP_NONE,
+ AMDGPU_FLIP_PENDING,
+ AMDGPU_FLIP_SUBMITTED
+};
+
+#define AMDGPU_MAX_I2C_BUS 16
+
+/* amdgpu gpio-based i2c
+ * 1. "mask" reg and bits
+ * grabs the gpio pins for software use
+ * 0=not held 1=held
+ * 2. "a" reg and bits
+ * output pin value
+ * 0=low 1=high
+ * 3. "en" reg and bits
+ * sets the pin direction
+ * 0=input 1=output
+ * 4. "y" reg and bits
+ * input pin value
+ * 0=low 1=high
+ */
+struct amdgpu_i2c_bus_rec {
+ bool valid;
+ /* id used by atom */
+ uint8_t i2c_id;
+ /* id used by atom */
+ enum amdgpu_hpd_id hpd;
+ /* can be used with hw i2c engine */
+ bool hw_capable;
+ /* uses multi-media i2c engine */
+ bool mm_i2c;
+ /* regs and bits */
+ uint32_t mask_clk_reg;
+ uint32_t mask_data_reg;
+ uint32_t a_clk_reg;
+ uint32_t a_data_reg;
+ uint32_t en_clk_reg;
+ uint32_t en_data_reg;
+ uint32_t y_clk_reg;
+ uint32_t y_data_reg;
+ uint32_t mask_clk_mask;
+ uint32_t mask_data_mask;
+ uint32_t a_clk_mask;
+ uint32_t a_data_mask;
+ uint32_t en_clk_mask;
+ uint32_t en_data_mask;
+ uint32_t y_clk_mask;
+ uint32_t y_data_mask;
+};
+
+#define AMDGPU_MAX_BIOS_CONNECTOR 16
+
+/* pll flags */
+#define AMDGPU_PLL_USE_BIOS_DIVS (1 << 0)
+#define AMDGPU_PLL_NO_ODD_POST_DIV (1 << 1)
+#define AMDGPU_PLL_USE_REF_DIV (1 << 2)
+#define AMDGPU_PLL_LEGACY (1 << 3)
+#define AMDGPU_PLL_PREFER_LOW_REF_DIV (1 << 4)
+#define AMDGPU_PLL_PREFER_HIGH_REF_DIV (1 << 5)
+#define AMDGPU_PLL_PREFER_LOW_FB_DIV (1 << 6)
+#define AMDGPU_PLL_PREFER_HIGH_FB_DIV (1 << 7)
+#define AMDGPU_PLL_PREFER_LOW_POST_DIV (1 << 8)
+#define AMDGPU_PLL_PREFER_HIGH_POST_DIV (1 << 9)
+#define AMDGPU_PLL_USE_FRAC_FB_DIV (1 << 10)
+#define AMDGPU_PLL_PREFER_CLOSEST_LOWER (1 << 11)
+#define AMDGPU_PLL_USE_POST_DIV (1 << 12)
+#define AMDGPU_PLL_IS_LCD (1 << 13)
+#define AMDGPU_PLL_PREFER_MINM_OVER_MAXP (1 << 14)
+
+struct amdgpu_pll {
+ /* reference frequency */
+ uint32_t reference_freq;
+
+ /* fixed dividers */
+ uint32_t reference_div;
+ uint32_t post_div;
+
+ /* pll in/out limits */
+ uint32_t pll_in_min;
+ uint32_t pll_in_max;
+ uint32_t pll_out_min;
+ uint32_t pll_out_max;
+ uint32_t lcd_pll_out_min;
+ uint32_t lcd_pll_out_max;
+ uint32_t best_vco;
+
+ /* divider limits */
+ uint32_t min_ref_div;
+ uint32_t max_ref_div;
+ uint32_t min_post_div;
+ uint32_t max_post_div;
+ uint32_t min_feedback_div;
+ uint32_t max_feedback_div;
+ uint32_t min_frac_feedback_div;
+ uint32_t max_frac_feedback_div;
+
+ /* flags for the current clock */
+ uint32_t flags;
+
+ /* pll id */
+ uint32_t id;
+};
+
+struct amdgpu_i2c_chan {
+ struct i2c_adapter adapter;
+ struct drm_device *dev;
+ struct i2c_algo_bit_data bit;
+ struct amdgpu_i2c_bus_rec rec;
+ struct drm_dp_aux aux;
+ bool has_aux;
+ struct mutex mutex;
+};
+
+struct amdgpu_afmt {
+ bool enabled;
+ int offset;
+ bool last_buffer_filled_status;
+ int id;
+ struct amdgpu_audio_pin *pin;
+};
+
+/*
+ * Audio
+ */
+struct amdgpu_audio_pin {
+ int channels;
+ int rate;
+ int bits_per_sample;
+ u8 status_bits;
+ u8 category_code;
+ u32 offset;
+ bool connected;
+ u32 id;
+};
+
+struct amdgpu_audio {
+ bool enabled;
+ struct amdgpu_audio_pin pin[AMDGPU_MAX_AFMT_BLOCKS];
+ int num_pins;
+};
+
+struct amdgpu_display_funcs {
+ /* display watermarks */
+ void (*bandwidth_update)(struct amdgpu_device *adev);
+ /* get frame count */
+ u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
+ /* set backlight level */
+ void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
+ u8 level);
+ /* get backlight level */
+ u8 (*backlight_get_level)(struct amdgpu_encoder *amdgpu_encoder);
+ /* hotplug detect */
+ bool (*hpd_sense)(struct amdgpu_device *adev, enum amdgpu_hpd_id hpd);
+ void (*hpd_set_polarity)(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd);
+ u32 (*hpd_get_gpio_reg)(struct amdgpu_device *adev);
+ /* pageflipping */
+ void (*page_flip)(struct amdgpu_device *adev,
+ int crtc_id, u64 crtc_base, bool async);
+ int (*page_flip_get_scanoutpos)(struct amdgpu_device *adev, int crtc,
+ u32 *vbl, u32 *position);
+ /* display topology setup */
+ void (*add_encoder)(struct amdgpu_device *adev,
+ uint32_t encoder_enum,
+ uint32_t supported_device,
+ u16 caps);
+ void (*add_connector)(struct amdgpu_device *adev,
+ uint32_t connector_id,
+ uint32_t supported_device,
+ int connector_type,
+ struct amdgpu_i2c_bus_rec *i2c_bus,
+ uint16_t connector_object_id,
+ struct amdgpu_hpd *hpd,
+ struct amdgpu_router *router);
+
+
+};
+
+struct amdgpu_framebuffer {
+ struct drm_framebuffer base;
+
+ uint64_t tiling_flags;
+ bool tmz_surface;
+
+ /* caching for later use */
+ uint64_t address;
+};
+
+struct amdgpu_mode_info {
+ struct atom_context *atom_context;
+ struct card_info *atom_card_info;
+ bool mode_config_initialized;
+ struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
+ struct drm_plane *planes[AMDGPU_MAX_PLANES];
+ struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
+ /* DVI-I properties */
+ struct drm_property *coherent_mode_property;
+ /* DAC enable load detect */
+ struct drm_property *load_detect_property;
+ /* underscan */
+ struct drm_property *underscan_property;
+ struct drm_property *underscan_hborder_property;
+ struct drm_property *underscan_vborder_property;
+ /* audio */
+ struct drm_property *audio_property;
+ /* FMT dithering */
+ struct drm_property *dither_property;
+ /* Adaptive Backlight Modulation (power feature) */
+ struct drm_property *abm_level_property;
+ /* hardcoded DFP edid from BIOS */
+ struct edid *bios_hardcoded_edid;
+ int bios_hardcoded_edid_size;
+
+ /* firmware flags */
+ u32 firmware_flags;
+ /* pointer to backlight encoder */
+ struct amdgpu_encoder *bl_encoder;
+ u8 bl_level; /* saved backlight level */
+ struct amdgpu_audio audio; /* audio stuff */
+ int num_crtc; /* number of crtcs */
+ int num_hpd; /* number of hpd pins */
+ int num_dig; /* number of dig blocks */
+ bool gpu_vm_support; /* supports display from GTT */
+ int disp_priority;
+ const struct amdgpu_display_funcs *funcs;
+ const enum drm_plane_type *plane_type;
+};
+
+#define AMDGPU_MAX_BL_LEVEL 0xFF
+
+struct amdgpu_backlight_privdata {
+ struct amdgpu_encoder *encoder;
+ uint8_t negative;
+};
+
+struct amdgpu_atom_ss {
+ uint16_t percentage;
+ uint16_t percentage_divider;
+ uint8_t type;
+ uint16_t step;
+ uint8_t delay;
+ uint8_t range;
+ uint8_t refdiv;
+ /* asic_ss */
+ uint16_t rate;
+ uint16_t amount;
+};
+
+struct amdgpu_crtc {
+ struct drm_crtc base;
+ int crtc_id;
+ bool enabled;
+ bool can_tile;
+ uint32_t crtc_offset;
+ struct drm_gem_object *cursor_bo;
+ uint64_t cursor_addr;
+ int cursor_x;
+ int cursor_y;
+ int cursor_hot_x;
+ int cursor_hot_y;
+ int cursor_width;
+ int cursor_height;
+ int max_cursor_width;
+ int max_cursor_height;
+ enum amdgpu_rmx_type rmx_type;
+ u8 h_border;
+ u8 v_border;
+ fixed20_12 vsc;
+ fixed20_12 hsc;
+ struct drm_display_mode native_mode;
+ u32 pll_id;
+ /* page flipping */
+ struct amdgpu_flip_work *pflip_works;
+ enum amdgpu_flip_status pflip_status;
+ int deferred_flip_completion;
+ /* parameters access from DM IRQ handler */
+ struct dm_irq_params dm_irq_params;
+ /* pll sharing */
+ struct amdgpu_atom_ss ss;
+ bool ss_enabled;
+ u32 adjusted_clock;
+ int bpc;
+ u32 pll_reference_div;
+ u32 pll_post_div;
+ u32 pll_flags;
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+ /* for dpm */
+ u32 line_time;
+ u32 wm_low;
+ u32 wm_high;
+ u32 lb_vblank_lead_lines;
+ struct drm_display_mode hw_mode;
+ /* for virtual dce */
+ struct hrtimer vblank_timer;
+ enum amdgpu_interrupt_state vsync_timer_enabled;
+
+ int otg_inst;
+ struct drm_pending_vblank_event *event;
+};
+
+struct amdgpu_encoder_atom_dig {
+ bool linkb;
+ /* atom dig */
+ bool coherent_mode;
+ int dig_encoder; /* -1 disabled, 0 DIGA, 1 DIGB, etc. */
+ /* atom lvds/edp */
+ uint32_t lcd_misc;
+ uint16_t panel_pwr_delay;
+ uint32_t lcd_ss_id;
+ /* panel mode */
+ struct drm_display_mode native_mode;
+ struct backlight_device *bl_dev;
+ int dpms_mode;
+ uint8_t backlight_level;
+ int panel_mode;
+ struct amdgpu_afmt *afmt;
+};
+
+struct amdgpu_encoder {
+ struct drm_encoder base;
+ uint32_t encoder_enum;
+ uint32_t encoder_id;
+ uint32_t devices;
+ uint32_t active_device;
+ uint32_t flags;
+ uint32_t pixel_clock;
+ enum amdgpu_rmx_type rmx_type;
+ enum amdgpu_underscan_type underscan_type;
+ uint32_t underscan_hborder;
+ uint32_t underscan_vborder;
+ struct drm_display_mode native_mode;
+ void *enc_priv;
+ int audio_polling_active;
+ bool is_ext_encoder;
+ u16 caps;
+};
+
+struct amdgpu_connector_atom_dig {
+ /* displayport */
+ u8 dpcd[DP_RECEIVER_CAP_SIZE];
+ u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
+ u8 dp_sink_type;
+ int dp_clock;
+ int dp_lane_count;
+ bool edp_on;
+};
+
+struct amdgpu_gpio_rec {
+ bool valid;
+ u8 id;
+ u32 reg;
+ u32 mask;
+ u32 shift;
+};
+
+struct amdgpu_hpd {
+ enum amdgpu_hpd_id hpd;
+ u8 plugged_state;
+ struct amdgpu_gpio_rec gpio;
+};
+
+struct amdgpu_router {
+ u32 router_id;
+ struct amdgpu_i2c_bus_rec i2c_info;
+ u8 i2c_addr;
+ /* i2c mux */
+ bool ddc_valid;
+ u8 ddc_mux_type;
+ u8 ddc_mux_control_pin;
+ u8 ddc_mux_state;
+ /* clock/data mux */
+ bool cd_valid;
+ u8 cd_mux_type;
+ u8 cd_mux_control_pin;
+ u8 cd_mux_state;
+};
+
+enum amdgpu_connector_audio {
+ AMDGPU_AUDIO_DISABLE = 0,
+ AMDGPU_AUDIO_ENABLE = 1,
+ AMDGPU_AUDIO_AUTO = 2
+};
+
+enum amdgpu_connector_dither {
+ AMDGPU_FMT_DITHER_DISABLE = 0,
+ AMDGPU_FMT_DITHER_ENABLE = 1,
+};
+
+struct amdgpu_dm_dp_aux {
+ struct drm_dp_aux aux;
+ struct ddc_service *ddc_service;
+};
+
+struct amdgpu_i2c_adapter {
+ struct i2c_adapter base;
+
+ struct ddc_service *ddc_service;
+};
+
+#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
+
+struct amdgpu_connector {
+ struct drm_connector base;
+ uint32_t connector_id;
+ uint32_t devices;
+ struct amdgpu_i2c_chan *ddc_bus;
+ /* some systems have an hdmi and vga port with a shared ddc line */
+ bool shared_ddc;
+ bool use_digital;
+ /* we need to mind the EDID between detect
+ and get modes due to analog/digital/tvencoder */
+ struct edid *edid;
+ void *con_priv;
+ bool dac_load_detect;
+ bool detected_by_load; /* if the connection status was determined by load */
+ bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
+ uint16_t connector_object_id;
+ struct amdgpu_hpd hpd;
+ struct amdgpu_router router;
+ struct amdgpu_i2c_chan *router_bus;
+ enum amdgpu_connector_audio audio;
+ enum amdgpu_connector_dither dither;
+ unsigned pixelclock_for_modeset;
+};
+
+/* TODO: start to use this struct and remove same field from base one */
+struct amdgpu_mst_connector {
+ struct amdgpu_connector base;
+
+ struct drm_dp_mst_topology_mgr mst_mgr;
+ struct amdgpu_dm_dp_aux dm_dp_aux;
+ struct drm_dp_mst_port *mst_output_port;
+ struct amdgpu_connector *mst_root;
+ bool is_mst_connector;
+ struct amdgpu_encoder *mst_encoder;
+};
+
+#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
+ ((em) == ATOM_ENCODER_MODE_DP_MST))
+
+/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
+#define DRM_SCANOUTPOS_VALID (1 << 0)
+#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE (1 << 2)
+#define USE_REAL_VBLANKSTART (1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
+
+void amdgpu_link_encoder_connector(struct drm_device *dev);
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder(struct drm_encoder *encoder);
+struct drm_connector *
+amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder);
+bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
+ u32 pixel_clock);
+
+u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
+struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
+
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+ bool use_aux);
+
+void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
+
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+ unsigned int pipe, unsigned int flags, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode);
+
+int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+
+void amdgpu_enc_destroy(struct drm_encoder *encoder);
+void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);
+void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
+ struct drm_display_mode *adjusted_mode);
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
+
+bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
+ bool in_vblank_irq, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode);
+
+/* amdgpu_display.c */
+void amdgpu_display_print_display_setup(struct drm_device *dev);
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+ struct drm_modeset_acquire_ctx *ctx);
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t page_flip_flags, uint32_t target,
+ struct drm_modeset_acquire_ctx *ctx);
+extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
new file mode 100644
index 000000000..51ca544a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_nbio_ras *ras;
+
+ if (!adev->nbio.ras)
+ return 0;
+
+ ras = adev->nbio.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
+
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count)
+ return adev->nbio.funcs->get_pcie_replay_count(adev);
+
+ return 0;
+}
+
+void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ if (adev->nbio.funcs->get_pcie_usage)
+ adev->nbio.funcs->get_pcie_usage(adev, count0, count1);
+
+}
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
+ if (r)
+ goto late_fini;
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
new file mode 100644
index 000000000..6cf7a8829
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_NBIO_H__
+#define __AMDGPU_NBIO_H__
+
+/*
+ * amdgpu nbio functions
+ */
+struct nbio_hdp_flush_reg {
+ u32 ref_and_mask_cp0;
+ u32 ref_and_mask_cp1;
+ u32 ref_and_mask_cp2;
+ u32 ref_and_mask_cp3;
+ u32 ref_and_mask_cp4;
+ u32 ref_and_mask_cp5;
+ u32 ref_and_mask_cp6;
+ u32 ref_and_mask_cp7;
+ u32 ref_and_mask_cp8;
+ u32 ref_and_mask_cp9;
+ u32 ref_and_mask_sdma0;
+ u32 ref_and_mask_sdma1;
+ u32 ref_and_mask_sdma2;
+ u32 ref_and_mask_sdma3;
+ u32 ref_and_mask_sdma4;
+ u32 ref_and_mask_sdma5;
+ u32 ref_and_mask_sdma6;
+ u32 ref_and_mask_sdma7;
+};
+
+struct amdgpu_nbio_ras {
+ struct amdgpu_ras_block_object ras_block;
+ void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+ void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+ int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+ int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio_funcs {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+ u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_rev_id)(struct amdgpu_device *adev);
+ void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+ u32 (*get_memsize)(struct amdgpu_device *adev);
+ void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance);
+ void (*gc_doorbell_init)(struct amdgpu_device *adev);
+ void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*ih_doorbell_range)(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index);
+ void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u64 *flags);
+ void (*ih_control)(struct amdgpu_device *adev);
+ void (*init_registers)(struct amdgpu_device *adev);
+ void (*remap_hdp_registers)(struct amdgpu_device *adev);
+ void (*enable_aspm)(struct amdgpu_device *adev,
+ bool enable);
+ void (*program_aspm)(struct amdgpu_device *adev);
+ void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
+ void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
+ void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
+ u32 (*get_rom_offset)(struct amdgpu_device *adev);
+ int (*get_compute_partition_mode)(struct amdgpu_device *adev);
+ u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+ u32 *supp_modes);
+ u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1);
+};
+
+struct amdgpu_nbio {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ struct amdgpu_irq_src ras_controller_irq;
+ struct amdgpu_irq_src ras_err_event_athub_irq;
+ struct ras_common_if *ras_if;
+ const struct amdgpu_nbio_funcs *funcs;
+ struct amdgpu_nbio_ras *ras;
+};
+
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1);
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
new file mode 100644
index 000000000..ace837cfa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -0,0 +1,1642 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ * Dave Airlie
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+
+#include <drm/drm_drv.h>
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+
+/**
+ * DOC: amdgpu_object
+ *
+ * This defines the interfaces to operate on an &amdgpu_bo buffer object which
+ * represents memory used by driver (VRAM, system memory, etc.). The driver
+ * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces
+ * to create/destroy/set buffer object which are then managed by the kernel TTM
+ * memory manager.
+ * The interfaces are also used internally by kernel clients, including gfx,
+ * uvd, etc. for kernel managed allocations used by the GPU.
+ *
+ */
+
+static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+
+ amdgpu_bo_kunmap(bo);
+
+ if (bo->tbo.base.import_attach)
+ drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
+ drm_gem_object_release(&bo->tbo.base);
+ amdgpu_bo_unref(&bo->parent);
+ kvfree(bo);
+}
+
+static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ struct amdgpu_bo_user *ubo;
+
+ ubo = to_amdgpu_bo_user(bo);
+ kfree(ubo->metadata);
+ amdgpu_bo_destroy(tbo);
+}
+
+static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
+ struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
+ struct amdgpu_bo_vm *vmbo;
+
+ bo = shadow_bo->parent;
+ vmbo = to_amdgpu_bo_vm(bo);
+ /* in case amdgpu_device_recover_vram got NULL of bo->parent */
+ if (!list_empty(&vmbo->shadow_list)) {
+ mutex_lock(&adev->shadow_list_lock);
+ list_del_init(&vmbo->shadow_list);
+ mutex_unlock(&adev->shadow_list_lock);
+ }
+
+ amdgpu_bo_destroy(tbo);
+}
+
+/**
+ * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
+ * @bo: buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * an &amdgpu_bo.
+ *
+ * Returns:
+ * true if the object belongs to &amdgpu_bo, false if not.
+ */
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
+{
+ if (bo->destroy == &amdgpu_bo_destroy ||
+ bo->destroy == &amdgpu_bo_user_destroy ||
+ bo->destroy == &amdgpu_bo_vm_destroy)
+ return true;
+
+ return false;
+}
+
+/**
+ * amdgpu_bo_placement_from_domain - set buffer's placement
+ * @abo: &amdgpu_bo buffer object whose placement is to be set
+ * @domain: requested domain
+ *
+ * Sets buffer's placement according to requested domain and the buffer's
+ * flags.
+ */
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct ttm_placement *placement = &abo->placement;
+ struct ttm_place *places = abo->placements;
+ u64 flags = abo->flags;
+ u32 c = 0;
+
+ if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+
+ if (adev->gmc.mem_partitions && mem_id >= 0) {
+ places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
+ /*
+ * memory partition range lpfn is inclusive start + size - 1
+ * TTM place lpfn is exclusive start + size
+ */
+ places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
+ } else {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ }
+ places[c].mem_type = TTM_PL_VRAM;
+ places[c].flags = 0;
+
+ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
+ else
+ places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+ if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_DOORBELL;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type =
+ abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
+ AMDGPU_PL_PREEMPT : TTM_PL_TT;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_CPU) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = TTM_PL_SYSTEM;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_GDS) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_GDS;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_GWS) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_GWS;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (domain & AMDGPU_GEM_DOMAIN_OA) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_OA;
+ places[c].flags = 0;
+ c++;
+ }
+
+ if (!c) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = TTM_PL_SYSTEM;
+ places[c].flags = 0;
+ c++;
+ }
+
+ BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS);
+
+ placement->num_placement = c;
+ placement->placement = places;
+
+ placement->num_busy_placement = c;
+ placement->busy_placement = places;
+}
+
+/**
+ * amdgpu_bo_create_reserved - create reserved BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use, and returns it still
+ * reserved.
+ *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr)
+{
+ struct amdgpu_bo_param bp;
+ bool free = false;
+ int r;
+
+ if (!size) {
+ amdgpu_bo_unref(bo_ptr);
+ return 0;
+ }
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = align;
+ bp.domain = domain;
+ bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ if (!*bo_ptr) {
+ r = amdgpu_bo_create(adev, &bp, bo_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
+ r);
+ return r;
+ }
+ free = true;
+ }
+
+ r = amdgpu_bo_reserve(*bo_ptr, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
+ goto error_free;
+ }
+
+ r = amdgpu_bo_pin(*bo_ptr, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo_ptr);
+ goto error_unpin;
+ }
+
+ if (gpu_addr)
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
+ if (cpu_addr) {
+ r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
+ goto error_unpin;
+ }
+ }
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo_ptr);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo_ptr);
+
+error_free:
+ if (free)
+ amdgpu_bo_unref(bo_ptr);
+
+ return r;
+}
+
+/**
+ * amdgpu_bo_create_kernel - create BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use.
+ *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr)
+{
+ int r;
+
+ r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
+ gpu_addr, cpu_addr);
+
+ if (r)
+ return r;
+
+ if (*bo_ptr)
+ amdgpu_bo_unreserve(*bo_ptr);
+
+ return 0;
+}
+
+/**
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
+ *
+ * @adev: amdgpu device object
+ * @offset: offset of the BO
+ * @size: size of the BO
+ * @bo_ptr: used to initialize BOs in structures
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Creates a kernel BO at a specific offset in VRAM.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ unsigned int i;
+ int r;
+
+ offset &= PAGE_MASK;
+ size = ALIGN(size, PAGE_SIZE);
+
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+ cpu_addr);
+ if (r)
+ return r;
+
+ if ((*bo_ptr) == NULL)
+ return 0;
+
+ /*
+ * Remove the original mem node and create a new one at the request
+ * position.
+ */
+ if (cpu_addr)
+ amdgpu_bo_kunmap(*bo_ptr);
+
+ ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
+
+ for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
+ (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
+ (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+ }
+ r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
+ &(*bo_ptr)->tbo.resource, &ctx);
+ if (r)
+ goto error;
+
+ if (cpu_addr) {
+ r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+ if (r)
+ goto error;
+ }
+
+ amdgpu_bo_unreserve(*bo_ptr);
+ return 0;
+
+error:
+ amdgpu_bo_unreserve(*bo_ptr);
+ amdgpu_bo_unref(bo_ptr);
+ return r;
+}
+
+/**
+ * amdgpu_bo_free_kernel - free BO for kernel use
+ *
+ * @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
+ *
+ * unmaps and unpin a BO for kernel internal use.
+ */
+void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
+ void **cpu_addr)
+{
+ if (*bo == NULL)
+ return;
+
+ WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
+ if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
+ if (cpu_addr)
+ amdgpu_bo_kunmap(*bo);
+
+ amdgpu_bo_unpin(*bo);
+ amdgpu_bo_unreserve(*bo);
+ }
+ amdgpu_bo_unref(bo);
+
+ if (gpu_addr)
+ *gpu_addr = 0;
+
+ if (cpu_addr)
+ *cpu_addr = NULL;
+}
+
+/* Validate bo size is bit bigger then the request domain */
+static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
+ unsigned long size, u32 domain)
+{
+ struct ttm_resource_manager *man = NULL;
+
+ /*
+ * If GTT is part of requested domains the check must succeed to
+ * allow fall back to GTT.
+ */
+ if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+
+ if (man && size < man->size)
+ return true;
+ else if (!man)
+ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+ goto fail;
+ } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+
+ if (man && size < man->size)
+ return true;
+ goto fail;
+ }
+
+ /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
+ return true;
+
+fail:
+ if (man)
+ DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
+ man->size);
+ return false;
+}
+
+bool amdgpu_bo_support_uswc(u64 bo_flags)
+{
+
+#ifdef CONFIG_X86_32
+ /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
+ */
+ return false;
+#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
+ /* Don't try to enable write-combining when it can't work, or things
+ * may be slow
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
+ */
+
+#ifndef CONFIG_COMPILE_TEST
+#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
+ thanks to write-combining
+#endif
+
+ if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+ DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
+ "better performance thanks to write-combining\n");
+ return false;
+#else
+ /* For architectures that don't support WC memory,
+ * mask out the WC flag from the BO
+ */
+ if (!drm_arch_can_wc_memory())
+ return false;
+
+ return true;
+#endif
+}
+
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo **bo_ptr)
+{
+ struct ttm_operation_ctx ctx = {
+ .interruptible = (bp->type != ttm_bo_type_kernel),
+ .no_wait_gpu = bp->no_wait_gpu,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = bp->type != ttm_bo_type_kernel,
+ .resv = bp->resv
+ };
+ struct amdgpu_bo *bo;
+ unsigned long page_align, size = bp->size;
+ int r;
+
+ /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+ /* GWS and OA don't need any alignment. */
+ page_align = bp->byte_align;
+ size <<= PAGE_SHIFT;
+
+ } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
+ /* Both size and alignment must be a multiple of 4. */
+ page_align = ALIGN(bp->byte_align, 4);
+ size = ALIGN(size, 4) << PAGE_SHIFT;
+ } else {
+ /* Memory should be aligned at least to a page size. */
+ page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+ size = ALIGN(size, PAGE_SIZE);
+ }
+
+ if (!amdgpu_bo_validate_size(adev, size, bp->domain))
+ return -ENOMEM;
+
+ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
+
+ *bo_ptr = NULL;
+ bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
+ if (bo == NULL)
+ return -ENOMEM;
+ drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
+ bo->vm_bo = NULL;
+ bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+ bp->domain;
+ bo->allowed_domains = bo->preferred_domains;
+ if (bp->type != ttm_bo_type_kernel &&
+ !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
+ bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+ bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
+ bo->flags = bp->flags;
+
+ if (adev->gmc.mem_partitions)
+ /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
+ bo->xcp_id = bp->xcp_id_plus1 - 1;
+ else
+ /* For GPUs without spatial partitioning */
+ bo->xcp_id = 0;
+
+ if (!amdgpu_bo_support_uswc(bo->flags))
+ bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (adev->ras_enabled)
+ bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+
+ bo->tbo.bdev = &adev->mman.bdev;
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
+ AMDGPU_GEM_DOMAIN_GDS))
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ else
+ amdgpu_bo_placement_from_domain(bo, bp->domain);
+ if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 1;
+
+ if (!bp->destroy)
+ bp->destroy = &amdgpu_bo_destroy;
+
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, bp->type,
+ &bo->placement, page_align, &ctx, NULL,
+ bp->resv, bp->destroy);
+ if (unlikely(r != 0))
+ return r;
+
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM &&
+ amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
+ ctx.bytes_moved);
+ else
+ amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
+
+ if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+ struct dma_fence *fence;
+
+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+ if (unlikely(r))
+ goto fail_unreserve;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+ }
+ if (!bp->resv)
+ amdgpu_bo_unreserve(bo);
+ *bo_ptr = bo;
+
+ trace_amdgpu_bo_create(bo);
+
+ /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
+ if (bp->type == ttm_bo_type_device)
+ bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ return 0;
+
+fail_unreserve:
+ if (!bp->resv)
+ dma_resv_unlock(bo->tbo.base.resv);
+ amdgpu_bo_unref(&bo);
+ return r;
+}
+
+/**
+ * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @ubo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be used by user application;
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_user **ubo_ptr)
+{
+ struct amdgpu_bo *bo_ptr;
+ int r;
+
+ bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
+ bp->destroy = &amdgpu_bo_user_destroy;
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
+ if (r)
+ return r;
+
+ *ubo_ptr = to_amdgpu_bo_user(bo_ptr);
+ return r;
+}
+
+/**
+ * amdgpu_bo_create_vm - create an &amdgpu_bo_vm buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @vmbo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be for GPUVM.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_vm **vmbo_ptr)
+{
+ struct amdgpu_bo *bo_ptr;
+ int r;
+
+ /* bo_ptr_size will be determined by the caller and it depends on
+ * num of amdgpu_vm_pt entries.
+ */
+ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
+ if (r)
+ return r;
+
+ *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
+ return r;
+}
+
+/**
+ * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
+ *
+ * @vmbo: BO that will be inserted into the shadow list
+ *
+ * Insert a BO to the shadow list.
+ */
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
+
+ mutex_lock(&adev->shadow_list_lock);
+ list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
+ vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
+ vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
+ mutex_unlock(&adev->shadow_list_lock);
+}
+
+/**
+ * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
+ *
+ * @shadow: &amdgpu_bo shadow to be restored
+ * @fence: dma_fence associated with the operation
+ *
+ * Copies a buffer object's shadow content back to the object.
+ * This is used for recovering a buffer from its shadow in case of a gpu
+ * reset where vram context may be lost.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
+
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ uint64_t shadow_addr, parent_addr;
+
+ shadow_addr = amdgpu_bo_gpu_offset(shadow);
+ parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
+
+ return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
+ amdgpu_bo_size(shadow), NULL, fence,
+ true, false, false);
+}
+
+/**
+ * amdgpu_bo_kmap - map an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be mapped
+ * @ptr: kernel virtual address to be returned
+ *
+ * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
+ * amdgpu_bo_kptr() to get the kernel virtual address.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
+{
+ void *kptr;
+ long r;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+ return -EPERM;
+
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
+ kptr = amdgpu_bo_kptr(bo);
+ if (kptr) {
+ if (ptr)
+ *ptr = kptr;
+ return 0;
+ }
+
+ r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);
+ if (r)
+ return r;
+
+ if (ptr)
+ *ptr = amdgpu_bo_kptr(bo);
+
+ return 0;
+}
+
+/**
+ * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
+ *
+ * Returns:
+ * the virtual address of a buffer object area.
+ */
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
+{
+ bool is_iomem;
+
+ return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+}
+
+/**
+ * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unmapped
+ *
+ * Unmaps a kernel map set up by amdgpu_bo_kmap().
+ */
+void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
+{
+ if (bo->kmap.bo)
+ ttm_bo_kunmap(&bo->kmap);
+}
+
+/**
+ * amdgpu_bo_ref - reference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * References the contained &ttm_buffer_object.
+ *
+ * Returns:
+ * a refcounted pointer to the &amdgpu_bo buffer object.
+ */
+struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return NULL;
+
+ ttm_bo_get(&bo->tbo);
+ return bo;
+}
+
+/**
+ * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Unreferences the contained &ttm_buffer_object and clear the pointer
+ */
+void amdgpu_bo_unref(struct amdgpu_bo **bo)
+{
+ struct ttm_buffer_object *tbo;
+
+ if ((*bo) == NULL)
+ return;
+
+ tbo = &((*bo)->tbo);
+ ttm_bo_put(tbo);
+ *bo = NULL;
+}
+
+/**
+ * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ * @min_offset: the start of requested address range
+ * @max_offset: the end of requested address range
+ *
+ * Pins the buffer object according to requested domain and address range. If
+ * the memory is unbound gart memory, binds the pages into gart table. Adjusts
+ * pin_count and pin_size accordingly.
+ *
+ * Pinning means to lock pages in memory along with keeping them at a fixed
+ * offset. It is required when a buffer can not be moved, for example, when
+ * a display buffer is being scanned out.
+ *
+ * Compared with amdgpu_bo_pin(), this function gives more flexibility on
+ * where to pin a buffer if there are specific restrictions on where a buffer
+ * must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+ u64 min_offset, u64 max_offset)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_operation_ctx ctx = { false, false };
+ int r, i;
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+ return -EPERM;
+
+ if (WARN_ON_ONCE(min_offset > max_offset))
+ return -EINVAL;
+
+ /* Check domain to be pinned to against preferred domains */
+ if (bo->preferred_domains & domain)
+ domain = bo->preferred_domains & domain;
+
+ /* A shared bo cannot be migrated to VRAM */
+ if (bo->tbo.base.import_attach) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ else
+ return -EINVAL;
+ }
+
+ if (bo->tbo.pin_count) {
+ uint32_t mem_type = bo->tbo.resource->mem_type;
+ uint32_t mem_flags = bo->tbo.resource->placement;
+
+ if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
+ return -EINVAL;
+
+ if ((mem_type == TTM_PL_VRAM) &&
+ (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) &&
+ !(mem_flags & TTM_PL_FLAG_CONTIGUOUS))
+ return -EINVAL;
+
+ ttm_bo_pin(&bo->tbo);
+
+ if (max_offset != 0) {
+ u64 domain_start = amdgpu_ttm_domain_start(adev,
+ mem_type);
+ WARN_ON_ONCE(max_offset <
+ (amdgpu_bo_gpu_offset(bo) - domain_start));
+ }
+
+ return 0;
+ }
+
+ /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+ * See function amdgpu_display_supported_domains()
+ */
+ domain = amdgpu_bo_get_preferred_domain(adev, domain);
+
+ if (bo->tbo.base.import_attach)
+ dma_buf_pin(bo->tbo.base.import_attach);
+
+ /* force to pin into visible video ram */
+ if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, domain);
+ for (i = 0; i < bo->placement.num_placement; i++) {
+ unsigned int fpfn, lpfn;
+
+ fpfn = min_offset >> PAGE_SHIFT;
+ lpfn = max_offset >> PAGE_SHIFT;
+
+ if (fpfn > bo->placements[i].fpfn)
+ bo->placements[i].fpfn = fpfn;
+ if (!bo->placements[i].lpfn ||
+ (lpfn && lpfn < bo->placements[i].lpfn))
+ bo->placements[i].lpfn = lpfn;
+ }
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(r)) {
+ dev_err(adev->dev, "%p pin failed\n", bo);
+ goto error;
+ }
+
+ ttm_bo_pin(&bo->tbo);
+
+ domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ }
+
+error:
+ return r;
+}
+
+/**
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ *
+ * A simple wrapper to amdgpu_bo_pin_restricted().
+ * Provides a simpler API for buffers that do not have any strict restrictions
+ * on where a buffer must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
+{
+ bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+}
+
+/**
+ * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unpinned
+ *
+ * Decreases the pin_count, and clears the flags if pin_count reaches 0.
+ * Changes placement and pin size accordingly.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+void amdgpu_bo_unpin(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ ttm_bo_unpin(&bo->tbo);
+ if (bo->tbo.pin_count)
+ return;
+
+ if (bo->tbo.base.import_attach)
+ dma_buf_unpin(bo->tbo.base.import_attach);
+
+ if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
+ } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ }
+
+}
+
+static const char * const amdgpu_vram_names[] = {
+ "UNKNOWN",
+ "GDDR1",
+ "DDR2",
+ "GDDR3",
+ "GDDR4",
+ "GDDR5",
+ "HBM",
+ "DDR3",
+ "DDR4",
+ "GDDR6",
+ "DDR5",
+ "LPDDR4",
+ "LPDDR5"
+};
+
+/**
+ * amdgpu_bo_init - initialize memory manager
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_init() to initialize amdgpu memory manager.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_init(struct amdgpu_device *adev)
+{
+ /* On A+A platform, VRAM can be mapped as WB */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ /* reserve PAT memory space to WC for VRAM */
+ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+ adev->gmc.aper_size);
+
+ if (r) {
+ DRM_ERROR("Unable to set WC memtype for the aperture base\n");
+ return r;
+ }
+
+ /* Add an MTRR for the VRAM */
+ adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+ adev->gmc.aper_size);
+ }
+
+ DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
+ adev->gmc.mc_vram_size >> 20,
+ (unsigned long long)adev->gmc.aper_size >> 20);
+ DRM_INFO("RAM width %dbits %s\n",
+ adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
+ return amdgpu_ttm_init(adev);
+}
+
+/**
+ * amdgpu_bo_fini - tear down memory manager
+ * @adev: amdgpu device object
+ *
+ * Reverses amdgpu_bo_init() to tear down memory manager.
+ */
+void amdgpu_bo_fini(struct amdgpu_device *adev)
+{
+ int idx;
+
+ amdgpu_ttm_fini(adev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+ }
+ drm_dev_exit(idx);
+ }
+}
+
+/**
+ * amdgpu_bo_set_tiling_flags - set tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: new flags
+ *
+ * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or
+ * kernel driver to set the tiling flags on a buffer.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_bo_user *ubo;
+
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ if (adev->family <= AMDGPU_FAMILY_CZ &&
+ AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
+ return -EINVAL;
+
+ ubo = to_amdgpu_bo_user(bo);
+ ubo->tiling_flags = tiling_flags;
+ return 0;
+}
+
+/**
+ * amdgpu_bo_get_tiling_flags - get tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: returned flags
+ *
+ * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to
+ * set the tiling flags on a buffer.
+ */
+void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
+{
+ struct amdgpu_bo_user *ubo;
+
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ dma_resv_assert_held(bo->tbo.base.resv);
+ ubo = to_amdgpu_bo_user(bo);
+
+ if (tiling_flags)
+ *tiling_flags = ubo->tiling_flags;
+}
+
+/**
+ * amdgpu_bo_set_metadata - set metadata
+ * @bo: &amdgpu_bo buffer object
+ * @metadata: new metadata
+ * @metadata_size: size of the new metadata
+ * @flags: flags of the new metadata
+ *
+ * Sets buffer object's metadata, its size and flags.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
+ u32 metadata_size, uint64_t flags)
+{
+ struct amdgpu_bo_user *ubo;
+ void *buffer;
+
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ ubo = to_amdgpu_bo_user(bo);
+ if (!metadata_size) {
+ if (ubo->metadata_size) {
+ kfree(ubo->metadata);
+ ubo->metadata = NULL;
+ ubo->metadata_size = 0;
+ }
+ return 0;
+ }
+
+ if (metadata == NULL)
+ return -EINVAL;
+
+ buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ kfree(ubo->metadata);
+ ubo->metadata_flags = flags;
+ ubo->metadata = buffer;
+ ubo->metadata_size = metadata_size;
+
+ return 0;
+}
+
+/**
+ * amdgpu_bo_get_metadata - get metadata
+ * @bo: &amdgpu_bo buffer object
+ * @buffer: returned metadata
+ * @buffer_size: size of the buffer
+ * @metadata_size: size of the returned metadata
+ * @flags: flags of the returned metadata
+ *
+ * Gets buffer object's metadata, its size and flags. buffer_size shall not be
+ * less than metadata_size.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint64_t *flags)
+{
+ struct amdgpu_bo_user *ubo;
+
+ if (!buffer && !metadata_size)
+ return -EINVAL;
+
+ BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+ ubo = to_amdgpu_bo_user(bo);
+ if (metadata_size)
+ *metadata_size = ubo->metadata_size;
+
+ if (buffer) {
+ if (buffer_size < ubo->metadata_size)
+ return -EINVAL;
+
+ if (ubo->metadata_size)
+ memcpy(buffer, ubo->metadata, ubo->metadata_size);
+ }
+
+ if (flags)
+ *flags = ubo->metadata_flags;
+
+ return 0;
+}
+
+/**
+ * amdgpu_bo_move_notify - notification about a memory move
+ * @bo: pointer to a buffer object
+ * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new information of the bufer object
+ *
+ * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
+ * bookkeeping.
+ * TTM driver callback which is called when ttm moves a buffer.
+ */
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+ bool evict,
+ struct ttm_resource *new_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_bo *abo;
+ struct ttm_resource *old_mem = bo->resource;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return;
+
+ abo = ttm_to_amdgpu_bo(bo);
+ amdgpu_vm_bo_invalidate(adev, abo, evict);
+
+ amdgpu_bo_kunmap(abo);
+
+ if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
+ bo->resource->mem_type != TTM_PL_SYSTEM)
+ dma_buf_move_notify(abo->tbo.base.dma_buf);
+
+ /* remember the eviction */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
+
+ /* update statistics */
+ if (!new_mem)
+ return;
+
+ /* move_notify is called before move happens */
+ trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
+}
+
+void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
+ struct amdgpu_mem_stats *stats)
+{
+ uint64_t size = amdgpu_bo_size(bo);
+ unsigned int domain;
+
+ /* Abort if the BO doesn't currently have a backing store */
+ if (!bo->tbo.resource)
+ return;
+
+ domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ stats->vram += size;
+ if (amdgpu_bo_in_cpu_visible_vram(bo))
+ stats->visible_vram += size;
+ break;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ stats->gtt += size;
+ break;
+ case AMDGPU_GEM_DOMAIN_CPU:
+ default:
+ stats->cpu += size;
+ break;
+ }
+
+ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
+ stats->requested_vram += size;
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ stats->requested_visible_vram += size;
+
+ if (domain != AMDGPU_GEM_DOMAIN_VRAM) {
+ stats->evicted_vram += size;
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ stats->evicted_visible_vram += size;
+ }
+ } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
+ stats->requested_gtt += size;
+ }
+}
+
+/**
+ * amdgpu_bo_release_notify - notification about a BO being released
+ * @bo: pointer to a buffer object
+ *
+ * Wipes VRAM buffers whose contents should not be leaked before the
+ * memory is released.
+ */
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct dma_fence *fence = NULL;
+ struct amdgpu_bo *abo;
+ int r;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return;
+
+ abo = ttm_to_amdgpu_bo(bo);
+
+ if (abo->kfd_bo)
+ amdgpu_amdkfd_release_notify(abo);
+
+ /* We only remove the fence if the resv has individualized. */
+ WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
+ && bo->base.resv != &bo->base._resv);
+ if (bo->base.resv == &bo->base._resv)
+ amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+
+ if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
+ !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
+ adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
+ return;
+
+ if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
+ return;
+
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+ if (!WARN_ON(r)) {
+ amdgpu_bo_fence(abo, fence, false);
+ dma_fence_put(fence);
+ }
+
+ dma_resv_unlock(bo->base.resv);
+}
+
+/**
+ * amdgpu_bo_fault_reserve_notify - notification about a memory fault
+ * @bo: pointer to a buffer object
+ *
+ * Notifies the driver we are taking a fault on this BO and have reserved it,
+ * also performs bookkeeping.
+ * TTM driver callback for dealing with vm faults.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ int r;
+
+ /* Remember that this BO was accessed by the CPU */
+ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ if (bo->resource->mem_type != TTM_PL_VRAM)
+ return 0;
+
+ if (amdgpu_bo_in_cpu_visible_vram(abo))
+ return 0;
+
+ /* Can't move a pinned BO to visible VRAM */
+ if (abo->tbo.pin_count > 0)
+ return VM_FAULT_SIGBUS;
+
+ /* hurrah the memory is not visible ! */
+ atomic64_inc(&adev->num_vram_cpu_page_faults);
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT);
+
+ /* Avoid costly evictions; only set GTT as a busy placement */
+ abo->placement.num_busy_placement = 1;
+ abo->placement.busy_placement = &abo->placements[1];
+
+ r = ttm_bo_validate(bo, &abo->placement, &ctx);
+ if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
+ return VM_FAULT_NOPAGE;
+ else if (unlikely(r))
+ return VM_FAULT_SIGBUS;
+
+ /* this should never happen */
+ if (bo->resource->mem_type == TTM_PL_VRAM &&
+ !amdgpu_bo_in_cpu_visible_vram(abo))
+ return VM_FAULT_SIGBUS;
+
+ ttm_bo_move_to_lru_tail_unlocked(bo);
+ return 0;
+}
+
+/**
+ * amdgpu_bo_fence - add fence to buffer object
+ *
+ * @bo: buffer object in question
+ * @fence: fence to add
+ * @shared: true if fence should be added shared
+ *
+ */
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
+ bool shared)
+{
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int r;
+
+ r = dma_resv_reserve_fences(resv, 1);
+ if (r) {
+ /* As last resort on OOM we block for the fence */
+ dma_fence_wait(fence, false);
+ return;
+ }
+
+ dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+ DMA_RESV_USAGE_WRITE);
+}
+
+/**
+ * amdgpu_bo_sync_wait_resv - Wait for BO reservation fences
+ *
+ * @adev: amdgpu device pointer
+ * @resv: reservation object to sync to
+ * @sync_mode: synchronization mode
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Extract the fences from the reservation object and waits for them to finish.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
+ enum amdgpu_sync_mode sync_mode, void *owner,
+ bool intr)
+{
+ struct amdgpu_sync sync;
+ int r;
+
+ amdgpu_sync_create(&sync);
+ amdgpu_sync_resv(adev, &sync, resv, sync_mode, owner);
+ r = amdgpu_sync_wait(&sync, intr);
+ amdgpu_sync_free(&sync);
+ return r;
+}
+
+/**
+ * amdgpu_bo_sync_wait - Wrapper for amdgpu_bo_sync_wait_resv
+ * @bo: buffer object to wait for
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Wrapper to wait for fences in a BO.
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ return amdgpu_bo_sync_wait_resv(adev, bo->tbo.base.resv,
+ AMDGPU_SYNC_NE_OWNER, owner, intr);
+}
+
+/**
+ * amdgpu_bo_gpu_offset - return GPU offset of bo
+ * @bo: amdgpu object for which we query the offset
+ *
+ * Note: object should either be pinned or reserved when calling this
+ * function, it might be useful to add check for this for debugging.
+ *
+ * Returns:
+ * current GPU offset of the object.
+ */
+u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
+{
+ WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_SYSTEM);
+ WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&
+ !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel);
+ WARN_ON_ONCE(bo->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET);
+ WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
+
+ return amdgpu_bo_gpu_offset_no_check(bo);
+}
+
+/**
+ * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
+ * @bo: amdgpu object for which we query the offset
+ *
+ * Returns:
+ * current GPU offset of the object without raising warnings.
+ */
+u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t offset;
+
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+ amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+
+ return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
+ * amdgpu_bo_get_preferred_domain - get preferred domain
+ * @adev: amdgpu device object
+ * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
+ *
+ * Returns:
+ * Which of the allowed domains is preferred for allocating the BO.
+ */
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
+ uint32_t domain)
+{
+ if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+ ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+ if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ }
+ return domain;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+#define amdgpu_bo_print_flag(m, bo, flag) \
+ do { \
+ if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
+ seq_printf((m), " " #flag); \
+ } \
+ } while (0)
+
+/**
+ * amdgpu_bo_print_info - print BO info in debugfs file
+ *
+ * @id: Index or Id of the BO
+ * @bo: Requested BO for printing info
+ * @m: debugfs file
+ *
+ * Print BO information in debugfs file
+ *
+ * Returns:
+ * Size of the BO in bytes.
+ */
+u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
+{
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *dma_buf;
+ const char *placement;
+ unsigned int pin_count;
+ u64 size;
+
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ unsigned int domain;
+ domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ if (amdgpu_bo_in_cpu_visible_vram(bo))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+ break;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ placement = "GTT";
+ break;
+ case AMDGPU_GEM_DOMAIN_CPU:
+ default:
+ placement = "CPU";
+ break;
+ }
+ dma_resv_unlock(bo->tbo.base.resv);
+ } else {
+ placement = "UNKNOWN";
+ }
+
+ size = amdgpu_bo_size(bo);
+ seq_printf(m, "\t\t0x%08x: %12lld byte %s",
+ id, size, placement);
+
+ pin_count = READ_ONCE(bo->tbo.pin_count);
+ if (pin_count)
+ seq_printf(m, " pin count %d", pin_count);
+
+ dma_buf = READ_ONCE(bo->tbo.base.dma_buf);
+ attachment = READ_ONCE(bo->tbo.base.import_attach);
+
+ if (attachment)
+ seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino);
+ else if (dma_buf)
+ seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino);
+
+ amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+ amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
+ amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
+ amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
+ amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+ amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+ amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
+ seq_puts(m, "\n");
+
+ return size;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
new file mode 100644
index 000000000..d28e21bae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+#ifndef __AMDGPU_OBJECT_H__
+#define __AMDGPU_OBJECT_H__
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_res_cursor.h"
+
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
+
+#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
+#define AMDGPU_BO_MAX_PLACEMENTS 3
+
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
+
+#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
+#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
+
+struct amdgpu_bo_param {
+ unsigned long size;
+ int byte_align;
+ u32 bo_ptr_size;
+ u32 domain;
+ u32 preferred_domain;
+ u64 flags;
+ enum ttm_bo_type type;
+ bool no_wait_gpu;
+ struct dma_resv *resv;
+ void (*destroy)(struct ttm_buffer_object *bo);
+ /* xcp partition number plus 1, 0 means any partition */
+ int8_t xcp_id_plus1;
+};
+
+/* bo virtual addresses in a vm */
+struct amdgpu_bo_va_mapping {
+ struct amdgpu_bo_va *bo_va;
+ struct list_head list;
+ struct rb_node rb;
+ uint64_t start;
+ uint64_t last;
+ uint64_t __subtree_last;
+ uint64_t offset;
+ uint64_t flags;
+};
+
+/* User space allocated BO in a VM */
+struct amdgpu_bo_va {
+ struct amdgpu_vm_bo_base base;
+
+ /* protected by bo being reserved */
+ unsigned ref_count;
+
+ /* all other members protected by the VM PD being reserved */
+ struct dma_fence *last_pt_update;
+
+ /* mappings for this bo_va */
+ struct list_head invalids;
+ struct list_head valids;
+
+ /* If the mappings are cleared or filled */
+ bool cleared;
+
+ bool is_xgmi;
+};
+
+struct amdgpu_bo {
+ /* Protected by tbo.reserved */
+ u32 preferred_domains;
+ u32 allowed_domains;
+ struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS];
+ struct ttm_placement placement;
+ struct ttm_buffer_object tbo;
+ struct ttm_bo_kmap_obj kmap;
+ u64 flags;
+ /* per VM structure for page tables and with virtual addresses */
+ struct amdgpu_vm_bo_base *vm_bo;
+ /* Constant after initialization */
+ struct amdgpu_bo *parent;
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+ struct kgd_mem *kfd_bo;
+
+ /*
+ * For GPUs with spatial partitioning, xcp partition number, -1 means
+ * any partition. For other ASICs without spatial partition, always 0
+ * for memory accounting.
+ */
+ int8_t xcp_id;
+};
+
+struct amdgpu_bo_user {
+ struct amdgpu_bo bo;
+ u64 tiling_flags;
+ u64 metadata_flags;
+ void *metadata;
+ u32 metadata_size;
+
+};
+
+struct amdgpu_bo_vm {
+ struct amdgpu_bo bo;
+ struct amdgpu_bo *shadow;
+ struct list_head shadow_list;
+ struct amdgpu_vm_bo_base entries[];
+};
+
+struct amdgpu_mem_stats {
+ /* current VRAM usage, includes visible VRAM */
+ uint64_t vram;
+ /* current visible VRAM usage */
+ uint64_t visible_vram;
+ /* current GTT usage */
+ uint64_t gtt;
+ /* current system memory usage */
+ uint64_t cpu;
+ /* sum of evicted buffers, includes visible VRAM */
+ uint64_t evicted_vram;
+ /* sum of evicted buffers due to CPU access */
+ uint64_t evicted_visible_vram;
+ /* how much userspace asked for, includes vis.VRAM */
+ uint64_t requested_vram;
+ /* how much userspace asked for */
+ uint64_t requested_visible_vram;
+ /* how much userspace asked for */
+ uint64_t requested_gtt;
+};
+
+static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
+{
+ return container_of(tbo, struct amdgpu_bo, tbo);
+}
+
+/**
+ * amdgpu_mem_type_to_domain - return domain corresponding to mem_type
+ * @mem_type: ttm memory type
+ *
+ * Returns corresponding domain of the ttm mem_type
+ */
+static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
+{
+ switch (mem_type) {
+ case TTM_PL_VRAM:
+ return AMDGPU_GEM_DOMAIN_VRAM;
+ case TTM_PL_TT:
+ return AMDGPU_GEM_DOMAIN_GTT;
+ case TTM_PL_SYSTEM:
+ return AMDGPU_GEM_DOMAIN_CPU;
+ case AMDGPU_PL_GDS:
+ return AMDGPU_GEM_DOMAIN_GDS;
+ case AMDGPU_PL_GWS:
+ return AMDGPU_GEM_DOMAIN_GWS;
+ case AMDGPU_PL_OA:
+ return AMDGPU_GEM_DOMAIN_OA;
+ case AMDGPU_PL_DOORBELL:
+ return AMDGPU_GEM_DOMAIN_DOORBELL;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_bo_reserve - reserve bo
+ * @bo: bo structure
+ * @no_intr: don't return -ERESTARTSYS on pending signal
+ *
+ * Returns:
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
+
+ r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ dev_err(adev->dev, "%p reserve failed\n", bo);
+ return r;
+ }
+ return 0;
+}
+
+static inline void amdgpu_bo_unreserve(struct amdgpu_bo *bo)
+{
+ ttm_bo_unreserve(&bo->tbo);
+}
+
+static inline unsigned long amdgpu_bo_size(struct amdgpu_bo *bo)
+{
+ return bo->tbo.base.size;
+}
+
+static inline unsigned amdgpu_bo_ngpu_pages(struct amdgpu_bo *bo)
+{
+ return bo->tbo.base.size / AMDGPU_GPU_PAGE_SIZE;
+}
+
+static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo)
+{
+ return (bo->tbo.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE;
+}
+
+/**
+ * amdgpu_bo_mmap_offset - return mmap offset of bo
+ * @bo: amdgpu object for which we query the offset
+ *
+ * Returns mmap offset of the object.
+ */
+static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
+{
+ return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
+}
+
+/**
+ * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+ */
+static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_res_cursor cursor;
+
+ if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
+ return false;
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+ while (cursor.remaining) {
+ if (cursor.start < adev->gmc.visible_vram_size)
+ return true;
+
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ return false;
+}
+
+/**
+ * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
+ */
+static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
+{
+ return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+}
+
+/**
+ * amdgpu_bo_encrypted - test if the BO is encrypted
+ * @bo: pointer to a buffer object
+ *
+ * Return true if the buffer object is encrypted, false otherwise.
+ */
+static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
+{
+ return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
+}
+
+/**
+ * amdgpu_bo_shadowed - check if the BO is shadowed
+ *
+ * @bo: BO to be tested.
+ *
+ * Returns:
+ * NULL if not shadowed or else return a BO pointer.
+ */
+static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
+{
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ return to_amdgpu_bo_vm(bo)->shadow;
+
+ return NULL;
+}
+
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
+
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr);
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
+ struct amdgpu_bo_vm **ubo_ptr);
+void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
+ void **cpu_addr);
+int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
+void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
+struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
+void amdgpu_bo_unref(struct amdgpu_bo **bo);
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
+int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+ u64 min_offset, u64 max_offset);
+void amdgpu_bo_unpin(struct amdgpu_bo *bo);
+int amdgpu_bo_init(struct amdgpu_device *adev);
+void amdgpu_bo_fini(struct amdgpu_device *adev);
+int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags);
+void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags);
+int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
+ uint32_t metadata_size, uint64_t flags);
+int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint64_t *flags);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+ bool evict,
+ struct ttm_resource *new_mem);
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
+vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
+ bool shared);
+int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
+ enum amdgpu_sync_mode sync_mode, void *owner,
+ bool intr);
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
+u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
+u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
+void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
+ struct amdgpu_mem_stats *stats);
+void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
+ struct dma_fence **fence);
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
+ uint32_t domain);
+
+/*
+ * sub allocation
+ */
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+ return container_of(manager, struct amdgpu_sa_manager, base);
+}
+
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
+{
+ return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+ drm_suballoc_soffset(sa_bo);
+}
+
+static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
+{
+ return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+ drm_suballoc_soffset(sa_bo);
+}
+
+int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
+ struct amdgpu_sa_manager *sa_manager,
+ unsigned size, u32 align, u32 domain);
+void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
+ struct amdgpu_sa_manager *sa_manager);
+int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
+ struct amdgpu_sa_manager *sa_manager);
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size);
+void amdgpu_sa_bo_free(struct amdgpu_device *adev,
+ struct drm_suballoc **sa_bo,
+ struct dma_fence *fence);
+#if defined(CONFIG_DEBUG_FS)
+void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
+ struct seq_file *m);
+u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m);
+#endif
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
+
+bool amdgpu_bo_support_uswc(u64 bo_flags);
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
new file mode 100644
index 000000000..0bb2466d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include <asm/div64.h>
+#include <linux/gcd.h>
+
+/**
+ * amdgpu_pll_reduce_ratio - fractional number reduction
+ *
+ * @nom: nominator
+ * @den: denominator
+ * @nom_min: minimum value for nominator
+ * @den_min: minimum value for denominator
+ *
+ * Find the greatest common divisor and apply it on both nominator and
+ * denominator, but make nominator and denominator are at least as large
+ * as their minimum values.
+ */
+static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den,
+ unsigned nom_min, unsigned den_min)
+{
+ unsigned tmp;
+
+ /* reduce the numbers to a simpler ratio */
+ tmp = gcd(*nom, *den);
+ *nom /= tmp;
+ *den /= tmp;
+
+ /* make sure nominator is large enough */
+ if (*nom < nom_min) {
+ tmp = DIV_ROUND_UP(nom_min, *nom);
+ *nom *= tmp;
+ *den *= tmp;
+ }
+
+ /* make sure the denominator is large enough */
+ if (*den < den_min) {
+ tmp = DIV_ROUND_UP(den_min, *den);
+ *nom *= tmp;
+ *den *= tmp;
+ }
+}
+
+/**
+ * amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation
+ *
+ * @adev: amdgpu_device pointer
+ * @nom: nominator
+ * @den: denominator
+ * @post_div: post divider
+ * @fb_div_max: feedback divider maximum
+ * @ref_div_max: reference divider maximum
+ * @fb_div: resulting feedback divider
+ * @ref_div: resulting reference divider
+ *
+ * Calculate feedback and reference divider for a given post divider. Makes
+ * sure we stay within the limits.
+ */
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int nom,
+ unsigned int den, unsigned int post_div,
+ unsigned int fb_div_max, unsigned int ref_div_max,
+ unsigned int *fb_div, unsigned int *ref_div)
+{
+
+ /* limit reference * post divider to a maximum */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ ref_div_max = min(100 / post_div, ref_div_max);
+ else
+ ref_div_max = min(128 / post_div, ref_div_max);
+
+ /* get matching reference and feedback divider */
+ *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+ *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
+
+ /* limit fb divider to its maximum */
+ if (*fb_div > fb_div_max) {
+ *ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
+ *fb_div = fb_div_max;
+ }
+}
+
+/**
+ * amdgpu_pll_compute - compute PLL paramaters
+ *
+ * @adev: amdgpu_device pointer
+ * @pll: information about the PLL
+ * @freq: requested frequency
+ * @dot_clock_p: resulting pixel clock
+ * @fb_div_p: resulting feedback divider
+ * @frac_fb_div_p: fractional part of the feedback divider
+ * @ref_div_p: resulting reference divider
+ * @post_div_p: resulting reference divider
+ *
+ * Try to calculate the PLL parameters to generate the given frequency:
+ * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
+ */
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+ struct amdgpu_pll *pll,
+ u32 freq,
+ u32 *dot_clock_p,
+ u32 *fb_div_p,
+ u32 *frac_fb_div_p,
+ u32 *ref_div_p,
+ u32 *post_div_p)
+{
+ unsigned target_clock = pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV ?
+ freq : freq / 10;
+
+ unsigned fb_div_min, fb_div_max, fb_div;
+ unsigned post_div_min, post_div_max, post_div;
+ unsigned ref_div_min, ref_div_max, ref_div;
+ unsigned post_div_best, diff_best;
+ unsigned nom, den;
+
+ /* determine allowed feedback divider range */
+ fb_div_min = pll->min_feedback_div;
+ fb_div_max = pll->max_feedback_div;
+
+ if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+ fb_div_min *= 10;
+ fb_div_max *= 10;
+ }
+
+ /* determine allowed ref divider range */
+ if (pll->flags & AMDGPU_PLL_USE_REF_DIV)
+ ref_div_min = pll->reference_div;
+ else
+ ref_div_min = pll->min_ref_div;
+
+ if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV &&
+ pll->flags & AMDGPU_PLL_USE_REF_DIV)
+ ref_div_max = pll->reference_div;
+ else
+ ref_div_max = pll->max_ref_div;
+
+ /* determine allowed post divider range */
+ if (pll->flags & AMDGPU_PLL_USE_POST_DIV) {
+ post_div_min = pll->post_div;
+ post_div_max = pll->post_div;
+ } else {
+ unsigned vco_min, vco_max;
+
+ if (pll->flags & AMDGPU_PLL_IS_LCD) {
+ vco_min = pll->lcd_pll_out_min;
+ vco_max = pll->lcd_pll_out_max;
+ } else {
+ vco_min = pll->pll_out_min;
+ vco_max = pll->pll_out_max;
+ }
+
+ if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+ vco_min *= 10;
+ vco_max *= 10;
+ }
+
+ post_div_min = vco_min / target_clock;
+ if ((target_clock * post_div_min) < vco_min)
+ ++post_div_min;
+ if (post_div_min < pll->min_post_div)
+ post_div_min = pll->min_post_div;
+
+ post_div_max = vco_max / target_clock;
+ if ((target_clock * post_div_max) > vco_max)
+ --post_div_max;
+ if (post_div_max > pll->max_post_div)
+ post_div_max = pll->max_post_div;
+ }
+
+ /* represent the searched ratio as fractional number */
+ nom = target_clock;
+ den = pll->reference_freq;
+
+ /* reduce the numbers to a simpler ratio */
+ amdgpu_pll_reduce_ratio(&nom, &den, fb_div_min, post_div_min);
+
+ /* now search for a post divider */
+ if (pll->flags & AMDGPU_PLL_PREFER_MINM_OVER_MAXP)
+ post_div_best = post_div_min;
+ else
+ post_div_best = post_div_max;
+ diff_best = ~0;
+
+ for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
+ unsigned diff;
+ amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max,
+ ref_div_max, &fb_div, &ref_div);
+ diff = abs(target_clock - (pll->reference_freq * fb_div) /
+ (ref_div * post_div));
+
+ if (diff < diff_best || (diff == diff_best &&
+ !(pll->flags & AMDGPU_PLL_PREFER_MINM_OVER_MAXP))) {
+
+ post_div_best = post_div;
+ diff_best = diff;
+ }
+ }
+ post_div = post_div_best;
+
+ /* get the feedback and reference divider for the optimal value */
+ amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max,
+ &fb_div, &ref_div);
+
+ /* reduce the numbers to a simpler ratio once more */
+ /* this also makes sure that the reference divider is large enough */
+ amdgpu_pll_reduce_ratio(&fb_div, &ref_div, fb_div_min, ref_div_min);
+
+ /* avoid high jitter with small fractional dividers */
+ if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV && (fb_div % 10)) {
+ fb_div_min = max(fb_div_min, (9 - (fb_div % 10)) * 20 + 60);
+ if (fb_div < fb_div_min) {
+ unsigned tmp = DIV_ROUND_UP(fb_div_min, fb_div);
+ fb_div *= tmp;
+ ref_div *= tmp;
+ }
+ }
+
+ /* and finally save the result */
+ if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+ *fb_div_p = fb_div / 10;
+ *frac_fb_div_p = fb_div % 10;
+ } else {
+ *fb_div_p = fb_div;
+ *frac_fb_div_p = 0;
+ }
+
+ *dot_clock_p = ((pll->reference_freq * *fb_div_p * 10) +
+ (pll->reference_freq * *frac_fb_div_p)) /
+ (ref_div * post_div * 10);
+ *ref_div_p = ref_div;
+ *post_div_p = post_div;
+
+ DRM_DEBUG_KMS("%d - %d, pll dividers - fb: %d.%d ref: %d, post %d\n",
+ freq, *dot_clock_p * 10, *fb_div_p, *frac_fb_div_p,
+ ref_div, post_div);
+}
+
+/**
+ * amdgpu_pll_get_use_mask - look up a mask of which pplls are in use
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the mask of which PPLLs (Pixel PLLs) are in use.
+ */
+u32 amdgpu_pll_get_use_mask(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_crtc *test_crtc;
+ struct amdgpu_crtc *test_amdgpu_crtc;
+ u32 pll_in_use = 0;
+
+ list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+ if (crtc == test_crtc)
+ continue;
+
+ test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+ if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+ pll_in_use |= (1 << test_amdgpu_crtc->pll_id);
+ }
+ return pll_in_use;
+}
+
+/**
+ * amdgpu_pll_get_shared_dp_ppll - return the PPLL used by another crtc for DP
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) used by another crtc/encoder which is
+ * also in DP mode. For DP, a single PPLL can be used for all DP
+ * crtcs/encoders.
+ */
+int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_crtc *test_crtc;
+ struct amdgpu_crtc *test_amdgpu_crtc;
+
+ list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+ if (crtc == test_crtc)
+ continue;
+ test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+ if (test_amdgpu_crtc->encoder &&
+ ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(test_amdgpu_crtc->encoder))) {
+ /* for DP use the same PLL for all */
+ if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+ return test_amdgpu_crtc->pll_id;
+ }
+ }
+ return ATOM_PPLL_INVALID;
+}
+
+/**
+ * amdgpu_pll_get_shared_nondp_ppll - return the PPLL used by another non-DP crtc
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) used by another non-DP crtc/encoder which can
+ * be shared (i.e., same clock).
+ */
+int amdgpu_pll_get_shared_nondp_ppll(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct drm_crtc *test_crtc;
+ struct amdgpu_crtc *test_amdgpu_crtc;
+ u32 adjusted_clock, test_adjusted_clock;
+
+ adjusted_clock = amdgpu_crtc->adjusted_clock;
+
+ if (adjusted_clock == 0)
+ return ATOM_PPLL_INVALID;
+
+ list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+ if (crtc == test_crtc)
+ continue;
+ test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+ if (test_amdgpu_crtc->encoder &&
+ !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(test_amdgpu_crtc->encoder))) {
+ /* check if we are already driving this connector with another crtc */
+ if (test_amdgpu_crtc->connector == amdgpu_crtc->connector) {
+ /* if we are, return that pll */
+ if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+ return test_amdgpu_crtc->pll_id;
+ }
+ /* for non-DP check the clock */
+ test_adjusted_clock = test_amdgpu_crtc->adjusted_clock;
+ if ((crtc->mode.clock == test_crtc->mode.clock) &&
+ (adjusted_clock == test_adjusted_clock) &&
+ (amdgpu_crtc->ss_enabled == test_amdgpu_crtc->ss_enabled) &&
+ (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID))
+ return test_amdgpu_crtc->pll_id;
+ }
+ }
+ return ATOM_PPLL_INVALID;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
new file mode 100644
index 000000000..44a583d6c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PLL_H__
+#define __AMDGPU_PLL_H__
+
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+ struct amdgpu_pll *pll,
+ u32 freq,
+ u32 *dot_clock_p,
+ u32 *fb_div_p,
+ u32 *frac_fb_div_p,
+ u32 *ref_div_p,
+ u32 *post_div_p);
+u32 amdgpu_pll_get_use_mask(struct drm_crtc *crtc);
+int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc);
+int amdgpu_pll_get_shared_nondp_ppll(struct drm_crtc *crtc);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
new file mode 100644
index 000000000..6e91ea1de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/perf_event.h>
+#include <linux/init.h>
+#include "amdgpu.h"
+#include "amdgpu_pmu.h"
+
+#define PMU_NAME_SIZE 32
+#define NUM_FORMATS_AMDGPU_PMU 4
+#define NUM_FORMATS_DF_VEGA20 3
+#define NUM_EVENTS_DF_VEGA20 8
+#define NUM_EVENT_TYPES_VEGA20 1
+#define NUM_EVENTS_VEGA20_XGMI 2
+#define NUM_EVENTS_VEGA20_MAX NUM_EVENTS_VEGA20_XGMI
+#define NUM_EVENT_TYPES_ARCTURUS 1
+#define NUM_EVENTS_ARCTURUS_XGMI 6
+#define NUM_EVENTS_ARCTURUS_MAX NUM_EVENTS_ARCTURUS_XGMI
+
+struct amdgpu_pmu_event_attribute {
+ struct device_attribute attr;
+ const char *event_str;
+ unsigned int type;
+};
+
+/* record to keep track of pmu entry per pmu type per device */
+struct amdgpu_pmu_entry {
+ struct list_head entry;
+ struct amdgpu_device *adev;
+ struct pmu pmu;
+ unsigned int pmu_perf_type;
+ char *pmu_type_name;
+ char *pmu_file_prefix;
+ struct attribute_group fmt_attr_group;
+ struct amdgpu_pmu_event_attribute *fmt_attr;
+ struct attribute_group evt_attr_group;
+ struct amdgpu_pmu_event_attribute *evt_attr;
+};
+
+static ssize_t amdgpu_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_pmu_event_attribute *amdgpu_pmu_attr;
+
+ amdgpu_pmu_attr = container_of(attr, struct amdgpu_pmu_event_attribute,
+ attr);
+
+ if (!amdgpu_pmu_attr->type)
+ return sprintf(buf, "%s\n", amdgpu_pmu_attr->event_str);
+
+ return sprintf(buf, "%s,type=0x%x\n",
+ amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type);
+}
+
+static LIST_HEAD(amdgpu_pmu_list);
+
+
+struct amdgpu_pmu_attr {
+ const char *name;
+ const char *config;
+};
+
+struct amdgpu_pmu_type {
+ const unsigned int type;
+ const unsigned int num_of_type;
+};
+
+struct amdgpu_pmu_config {
+ struct amdgpu_pmu_attr *formats;
+ unsigned int num_formats;
+ struct amdgpu_pmu_attr *events;
+ unsigned int num_events;
+ struct amdgpu_pmu_type *types;
+ unsigned int num_types;
+};
+
+/*
+ * Events fall under two categories:
+ * - PMU typed
+ * Events in /sys/bus/event_source/devices/amdgpu_<pmu_type>_<dev_num> have
+ * performance counter operations handled by one IP <pmu_type>. Formats and
+ * events should be defined by <pmu_type>_<asic_type>_formats and
+ * <pmu_type>_<asic_type>_events respectively.
+ *
+ * - Event config typed
+ * Events in /sys/bus/event_source/devices/amdgpu_<dev_num> have performance
+ * counter operations that can be handled by multiple IPs dictated by their
+ * "type" format field. Formats and events should be defined by
+ * amdgpu_pmu_formats and <asic_type>_events respectively. Format field
+ * "type" is generated in amdgpu_pmu_event_show and defined in
+ * <asic_type>_event_config_types.
+ */
+
+static struct amdgpu_pmu_attr amdgpu_pmu_formats[NUM_FORMATS_AMDGPU_PMU] = {
+ { .name = "event", .config = "config:0-7" },
+ { .name = "instance", .config = "config:8-15" },
+ { .name = "umask", .config = "config:16-23"},
+ { .name = "type", .config = "config:56-63"}
+};
+
+/* Vega20 events */
+static struct amdgpu_pmu_attr vega20_events[NUM_EVENTS_VEGA20_MAX] = {
+ { .name = "xgmi_link0_data_outbound",
+ .config = "event=0x7,instance=0x46,umask=0x2" },
+ { .name = "xgmi_link1_data_outbound",
+ .config = "event=0x7,instance=0x47,umask=0x2" }
+};
+
+static struct amdgpu_pmu_type vega20_types[NUM_EVENT_TYPES_VEGA20] = {
+ { .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+ .num_of_type = NUM_EVENTS_VEGA20_XGMI }
+};
+
+static struct amdgpu_pmu_config vega20_config = {
+ .formats = amdgpu_pmu_formats,
+ .num_formats = ARRAY_SIZE(amdgpu_pmu_formats),
+ .events = vega20_events,
+ .num_events = ARRAY_SIZE(vega20_events),
+ .types = vega20_types,
+ .num_types = ARRAY_SIZE(vega20_types)
+};
+
+/* Vega20 data fabric (DF) events */
+static struct amdgpu_pmu_attr df_vega20_formats[NUM_FORMATS_DF_VEGA20] = {
+ { .name = "event", .config = "config:0-7" },
+ { .name = "instance", .config = "config:8-15" },
+ { .name = "umask", .config = "config:16-23"}
+};
+
+static struct amdgpu_pmu_attr df_vega20_events[NUM_EVENTS_DF_VEGA20] = {
+ { .name = "cake0_pcsout_txdata",
+ .config = "event=0x7,instance=0x46,umask=0x2" },
+ { .name = "cake1_pcsout_txdata",
+ .config = "event=0x7,instance=0x47,umask=0x2" },
+ { .name = "cake0_pcsout_txmeta",
+ .config = "event=0x7,instance=0x46,umask=0x4" },
+ { .name = "cake1_pcsout_txmeta",
+ .config = "event=0x7,instance=0x47,umask=0x4" },
+ { .name = "cake0_ftiinstat_reqalloc",
+ .config = "event=0xb,instance=0x46,umask=0x4" },
+ { .name = "cake1_ftiinstat_reqalloc",
+ .config = "event=0xb,instance=0x47,umask=0x4" },
+ { .name = "cake0_ftiinstat_rspalloc",
+ .config = "event=0xb,instance=0x46,umask=0x8" },
+ { .name = "cake1_ftiinstat_rspalloc",
+ .config = "event=0xb,instance=0x47,umask=0x8" }
+};
+
+static struct amdgpu_pmu_config df_vega20_config = {
+ .formats = df_vega20_formats,
+ .num_formats = ARRAY_SIZE(df_vega20_formats),
+ .events = df_vega20_events,
+ .num_events = ARRAY_SIZE(df_vega20_events),
+ .types = NULL,
+ .num_types = 0
+};
+
+/* Arcturus events */
+static struct amdgpu_pmu_attr arcturus_events[NUM_EVENTS_ARCTURUS_MAX] = {
+ { .name = "xgmi_link0_data_outbound",
+ .config = "event=0x7,instance=0x4b,umask=0x2" },
+ { .name = "xgmi_link1_data_outbound",
+ .config = "event=0x7,instance=0x4c,umask=0x2" },
+ { .name = "xgmi_link2_data_outbound",
+ .config = "event=0x7,instance=0x4d,umask=0x2" },
+ { .name = "xgmi_link3_data_outbound",
+ .config = "event=0x7,instance=0x4e,umask=0x2" },
+ { .name = "xgmi_link4_data_outbound",
+ .config = "event=0x7,instance=0x4f,umask=0x2" },
+ { .name = "xgmi_link5_data_outbound",
+ .config = "event=0x7,instance=0x50,umask=0x2" }
+};
+
+static struct amdgpu_pmu_type arcturus_types[NUM_EVENT_TYPES_ARCTURUS] = {
+ { .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+ .num_of_type = NUM_EVENTS_ARCTURUS_XGMI }
+};
+
+static struct amdgpu_pmu_config arcturus_config = {
+ .formats = amdgpu_pmu_formats,
+ .num_formats = ARRAY_SIZE(amdgpu_pmu_formats),
+ .events = arcturus_events,
+ .num_events = ARRAY_SIZE(arcturus_events),
+ .types = arcturus_types,
+ .num_types = ARRAY_SIZE(arcturus_types)
+};
+
+/* initialize perf counter */
+static int amdgpu_perf_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* update the hw_perf_event struct with config data */
+ hwc->config = event->attr.config;
+ hwc->config_base = AMDGPU_PMU_PERF_TYPE_NONE;
+
+ return 0;
+}
+
+/* start perf counter */
+static void amdgpu_perf_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+ int target_cntr = 0;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ switch (hwc->config_base) {
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+ if (!(flags & PERF_EF_RELOAD)) {
+ target_cntr = pe->adev->df.funcs->pmc_start(pe->adev,
+ hwc->config, 0 /* unused */,
+ 1 /* add counter */);
+ if (target_cntr < 0)
+ break;
+
+ hwc->idx = target_cntr;
+ }
+
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->config,
+ hwc->idx, 0);
+ break;
+ default:
+ break;
+ }
+
+ perf_event_update_userpage(event);
+}
+
+/* read perf counter */
+static void amdgpu_perf_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+ u64 count, prev;
+
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_get_count))
+ return;
+
+ prev = local64_read(&hwc->prev_count);
+ do {
+ switch (hwc->config_base) {
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+ pe->adev->df.funcs->pmc_get_count(pe->adev,
+ hwc->config, hwc->idx, &count);
+ break;
+ default:
+ count = 0;
+ break;
+ }
+ } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count));
+
+ local64_add(count - prev, &event->count);
+}
+
+/* stop perf counter */
+static void amdgpu_perf_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
+
+ switch (hwc->config_base) {
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx,
+ 0);
+ break;
+ default:
+ break;
+ }
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ amdgpu_perf_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+/* add perf counter */
+static int amdgpu_perf_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int retval = 0, target_cntr;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_start))
+ return -EINVAL;
+
+ switch (pe->pmu_perf_type) {
+ case AMDGPU_PMU_PERF_TYPE_DF:
+ hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF;
+ break;
+ case AMDGPU_PMU_PERF_TYPE_ALL:
+ hwc->config_base = (hwc->config >>
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT) &
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK;
+ break;
+ }
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ switch (hwc->config_base) {
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+ target_cntr = pe->adev->df.funcs->pmc_start(pe->adev,
+ hwc->config, 0 /* unused */,
+ 1 /* add counter */);
+ if (target_cntr < 0)
+ retval = target_cntr;
+ else
+ hwc->idx = target_cntr;
+
+ break;
+ default:
+ return 0;
+ }
+
+ if (retval)
+ return retval;
+
+ if (flags & PERF_EF_START)
+ amdgpu_perf_start(event, PERF_EF_RELOAD);
+
+ return retval;
+}
+
+/* delete perf counter */
+static void amdgpu_perf_del(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+ if ((!pe->adev->df.funcs) ||
+ (!pe->adev->df.funcs->pmc_stop))
+ return;
+
+ amdgpu_perf_stop(event, PERF_EF_UPDATE);
+
+ switch (hwc->config_base) {
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+ case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx,
+ 1);
+ break;
+ default:
+ break;
+ }
+
+ perf_event_update_userpage(event);
+}
+
+static void amdgpu_pmu_create_event_attrs_by_type(
+ struct attribute_group *attr_group,
+ struct amdgpu_pmu_event_attribute *pmu_attr,
+ struct amdgpu_pmu_attr events[],
+ int s_offset,
+ int e_offset,
+ unsigned int type)
+{
+ int i;
+
+ pmu_attr += s_offset;
+
+ for (i = s_offset; i < e_offset; i++) {
+ attr_group->attrs[i] = &pmu_attr->attr.attr;
+ sysfs_attr_init(&pmu_attr->attr.attr);
+ pmu_attr->attr.attr.name = events[i].name;
+ pmu_attr->attr.attr.mode = 0444;
+ pmu_attr->attr.show = amdgpu_pmu_event_show;
+ pmu_attr->event_str = events[i].config;
+ pmu_attr->type = type;
+ pmu_attr++;
+ }
+}
+
+static void amdgpu_pmu_create_attrs(struct attribute_group *attr_group,
+ struct amdgpu_pmu_event_attribute *pmu_attr,
+ struct amdgpu_pmu_attr events[],
+ int num_events)
+{
+ amdgpu_pmu_create_event_attrs_by_type(attr_group, pmu_attr, events, 0,
+ num_events, AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE);
+}
+
+
+static int amdgpu_pmu_alloc_pmu_attrs(
+ struct attribute_group *fmt_attr_group,
+ struct amdgpu_pmu_event_attribute **fmt_attr,
+ struct attribute_group *evt_attr_group,
+ struct amdgpu_pmu_event_attribute **evt_attr,
+ struct amdgpu_pmu_config *config)
+{
+ *fmt_attr = kcalloc(config->num_formats, sizeof(**fmt_attr),
+ GFP_KERNEL);
+
+ if (!(*fmt_attr))
+ return -ENOMEM;
+
+ fmt_attr_group->attrs = kcalloc(config->num_formats + 1,
+ sizeof(*fmt_attr_group->attrs), GFP_KERNEL);
+
+ if (!fmt_attr_group->attrs)
+ goto err_fmt_attr_grp;
+
+ *evt_attr = kcalloc(config->num_events, sizeof(**evt_attr), GFP_KERNEL);
+
+ if (!(*evt_attr))
+ goto err_evt_attr;
+
+ evt_attr_group->attrs = kcalloc(config->num_events + 1,
+ sizeof(*evt_attr_group->attrs), GFP_KERNEL);
+
+ if (!evt_attr_group->attrs)
+ goto err_evt_attr_grp;
+
+ return 0;
+err_evt_attr_grp:
+ kfree(*evt_attr);
+err_evt_attr:
+ kfree(fmt_attr_group->attrs);
+err_fmt_attr_grp:
+ kfree(*fmt_attr);
+ return -ENOMEM;
+}
+
+/* init pmu tracking per pmu type */
+static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
+ struct amdgpu_pmu_config *config)
+{
+ const struct attribute_group *attr_groups[] = {
+ &pmu_entry->fmt_attr_group,
+ &pmu_entry->evt_attr_group,
+ NULL
+ };
+ char pmu_name[PMU_NAME_SIZE];
+ int ret = 0, total_num_events = 0;
+
+ pmu_entry->pmu = (struct pmu){
+ .event_init = amdgpu_perf_event_init,
+ .add = amdgpu_perf_add,
+ .del = amdgpu_perf_del,
+ .start = amdgpu_perf_start,
+ .stop = amdgpu_perf_stop,
+ .read = amdgpu_perf_read,
+ .task_ctx_nr = perf_invalid_context,
+ };
+
+ ret = amdgpu_pmu_alloc_pmu_attrs(&pmu_entry->fmt_attr_group,
+ &pmu_entry->fmt_attr,
+ &pmu_entry->evt_attr_group,
+ &pmu_entry->evt_attr,
+ config);
+
+ if (ret)
+ goto err_out;
+
+ amdgpu_pmu_create_attrs(&pmu_entry->fmt_attr_group, pmu_entry->fmt_attr,
+ config->formats, config->num_formats);
+
+ if (pmu_entry->pmu_perf_type == AMDGPU_PMU_PERF_TYPE_ALL) {
+ int i;
+
+ for (i = 0; i < config->num_types; i++) {
+ amdgpu_pmu_create_event_attrs_by_type(
+ &pmu_entry->evt_attr_group,
+ pmu_entry->evt_attr,
+ config->events,
+ total_num_events,
+ total_num_events +
+ config->types[i].num_of_type,
+ config->types[i].type);
+ total_num_events += config->types[i].num_of_type;
+ }
+ } else {
+ amdgpu_pmu_create_attrs(&pmu_entry->evt_attr_group,
+ pmu_entry->evt_attr,
+ config->events, config->num_events);
+ total_num_events = config->num_events;
+ }
+
+ pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+ GFP_KERNEL);
+
+ if (!pmu_entry->pmu.attr_groups) {
+ ret = -ENOMEM;
+ goto err_attr_group;
+ }
+
+ snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix,
+ adev_to_drm(pmu_entry->adev)->primary->index);
+
+ ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1);
+
+ if (ret)
+ goto err_register;
+
+ if (pmu_entry->pmu_perf_type != AMDGPU_PMU_PERF_TYPE_ALL)
+ pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n",
+ pmu_entry->pmu_type_name, total_num_events);
+ else
+ pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events);
+
+
+ list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
+
+ return 0;
+err_register:
+ kfree(pmu_entry->pmu.attr_groups);
+err_attr_group:
+ kfree(pmu_entry->fmt_attr_group.attrs);
+ kfree(pmu_entry->fmt_attr);
+ kfree(pmu_entry->evt_attr_group.attrs);
+ kfree(pmu_entry->evt_attr);
+err_out:
+ pr_warn("Error initializing AMDGPU %s PMUs.\n",
+ pmu_entry->pmu_type_name);
+ return ret;
+}
+
+/* destroy all pmu data associated with target device */
+void amdgpu_pmu_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_pmu_entry *pe, *temp;
+
+ list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
+ if (pe->adev != adev)
+ continue;
+ list_del(&pe->entry);
+ perf_pmu_unregister(&pe->pmu);
+ kfree(pe->pmu.attr_groups);
+ kfree(pe->fmt_attr_group.attrs);
+ kfree(pe->fmt_attr);
+ kfree(pe->evt_attr_group.attrs);
+ kfree(pe->evt_attr);
+ kfree(pe);
+ }
+}
+
+static struct amdgpu_pmu_entry *create_pmu_entry(struct amdgpu_device *adev,
+ unsigned int pmu_type,
+ char *pmu_type_name,
+ char *pmu_file_prefix)
+{
+ struct amdgpu_pmu_entry *pmu_entry;
+
+ pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL);
+
+ if (!pmu_entry)
+ return pmu_entry;
+
+ pmu_entry->adev = adev;
+ pmu_entry->fmt_attr_group.name = "format";
+ pmu_entry->fmt_attr_group.attrs = NULL;
+ pmu_entry->evt_attr_group.name = "events";
+ pmu_entry->evt_attr_group.attrs = NULL;
+ pmu_entry->pmu_perf_type = pmu_type;
+ pmu_entry->pmu_type_name = pmu_type_name;
+ pmu_entry->pmu_file_prefix = pmu_file_prefix;
+
+ return pmu_entry;
+}
+
+/* init amdgpu_pmu */
+int amdgpu_pmu_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+ struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF,
+ "DF", "amdgpu_df");
+
+ if (!pmu_entry_df)
+ return -ENOMEM;
+
+ ret = init_pmu_entry_by_type_and_add(pmu_entry_df,
+ &df_vega20_config);
+
+ if (ret) {
+ kfree(pmu_entry_df);
+ return ret;
+ }
+
+ pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL,
+ "", "amdgpu");
+
+ if (!pmu_entry) {
+ amdgpu_pmu_fini(adev);
+ return -ENOMEM;
+ }
+
+ ret = init_pmu_entry_by_type_and_add(pmu_entry,
+ &vega20_config);
+
+ if (ret) {
+ kfree(pmu_entry);
+ amdgpu_pmu_fini(adev);
+ return ret;
+ }
+
+ break;
+ case CHIP_ARCTURUS:
+ pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL,
+ "", "amdgpu");
+ if (!pmu_entry)
+ return -ENOMEM;
+
+ ret = init_pmu_entry_by_type_and_add(pmu_entry,
+ &arcturus_config);
+
+ if (ret) {
+ kfree(pmu_entry);
+ return -ENOMEM;
+ }
+
+ break;
+
+ default:
+ return 0;
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
new file mode 100644
index 000000000..6882dc48c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_PMU_H_
+#define _AMDGPU_PMU_H_
+
+/* PMU types. */
+enum amdgpu_pmu_perf_type {
+ AMDGPU_PMU_PERF_TYPE_NONE = 0,
+ AMDGPU_PMU_PERF_TYPE_DF,
+ AMDGPU_PMU_PERF_TYPE_ALL
+};
+
+/*
+ * PMU type AMDGPU_PMU_PERF_TYPE_ALL can hold events of different "type"
+ * configurations. Event config types are parsed from the 64-bit raw
+ * config (See EVENT_CONFIG_TYPE_SHIFT and EVENT_CONFIG_TYPE_MASK) and
+ * are registered into the HW perf events config_base.
+ *
+ * PMU types with only a single event configuration type
+ * (non-AMDGPU_PMU_PERF_TYPE_ALL) have their event config type auto generated
+ * when the performance counter is added.
+ */
+enum amdgpu_pmu_event_config_type {
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE = 0,
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_DF,
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+ AMDGPU_PMU_EVENT_CONFIG_TYPE_MAX
+};
+
+#define AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT 56
+#define AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK 0xff
+
+int amdgpu_pmu_init(struct amdgpu_device *adev);
+void amdgpu_pmu_fini(struct amdgpu_device *adev);
+
+#endif /* _AMDGPU_PMU_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
new file mode 100644
index 000000000..e8adfd0a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König, Felix Kuehling
+ */
+
+#include "amdgpu.h"
+
+/**
+ * DOC: mem_info_preempt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used preemptible memory.
+ * The file mem_info_preempt_used is used for this, and returns the current
+ * used size of the preemptible block, in bytes
+ */
+static ssize_t mem_info_preempt_used_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+static DEVICE_ATTR_RO(mem_info_preempt_used);
+
+/**
+ * amdgpu_preempt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: TTM memory object
+ *
+ * Dummy, just count the space used without allocating resources or any limit.
+ */
+static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ *res = kzalloc(sizeof(**res), GFP_KERNEL);
+ if (!*res)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, *res);
+ (*res)->start = AMDGPU_BO_INVALID_OFFSET;
+ return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ ttm_resource_fini(man, res);
+ kfree(res);
+}
+
+static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
+ .alloc = amdgpu_preempt_mgr_new,
+ .free = amdgpu_preempt_mgr_del,
+};
+
+/**
+ * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
+{
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+ int ret;
+
+ man->use_tt = true;
+ man->func = &amdgpu_preempt_mgr_func;
+
+ ttm_resource_manager_init(man, &adev->mman.bdev, (1 << 30));
+
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
+ return ret;
+ }
+
+ ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
+{
+ struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+ int ret;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
new file mode 100644
index 000000000..429ef212c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -0,0 +1,3827 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_xgmi.h"
+#include "soc15_common.h"
+#include "psp_v3_1.h"
+#include "psp_v10_0.h"
+#include "psp_v11_0.h"
+#include "psp_v11_0_8.h"
+#include "psp_v12_0.h"
+#include "psp_v13_0.h"
+#include "psp_v13_0_4.h"
+
+#include "amdgpu_ras.h"
+#include "amdgpu_securedisplay.h"
+#include "amdgpu_atomfirmware.h"
+
+#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3)
+
+static int psp_load_smu_fw(struct psp_context *psp);
+static int psp_rap_terminate(struct psp_context *psp);
+static int psp_securedisplay_terminate(struct psp_context *psp);
+
+static int psp_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Due to DF Cstate management centralized to PMFW, the firmware
+ * loading sequence will be updated as below:
+ * - Load KDB
+ * - Load SYS_DRV
+ * - Load tOS
+ * - Load PMFW
+ * - Setup TMR
+ * - Load other non-psp fw
+ * - Load ASD
+ * - Load XGMI/RAS/HDCP/DTM TA if any
+ *
+ * This new sequence is required for
+ * - Arcturus and onwards
+ */
+static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ psp->pmfw_centralized_cstate_management = false;
+ return;
+ }
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 7):
+ psp->pmfw_centralized_cstate_management = true;
+ break;
+ default:
+ psp->pmfw_centralized_cstate_management = false;
+ break;
+ }
+}
+
+static int psp_init_sriov_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int ret = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 9):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 2):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 0):
+ adev->virt.autoload_ucode_id = 0;
+ break;
+ case IP_VERSION(13, 0, 6):
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ ret &= psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(13, 0, 10):
+ adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+ ret = psp_init_cap_microcode(psp, ucode_prefix);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
+static int psp_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ psp_v3_1_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ psp_v10_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
+ psp_v11_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 7):
+ adev->psp.sup_pd_fw_up = !amdgpu_sriov_vf(adev);
+ fallthrough;
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ psp_v11_0_set_psp_funcs(psp);
+ psp->autoload_supported = true;
+ break;
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 1):
+ psp_v12_0_set_psp_funcs(psp);
+ break;
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ psp_v13_0_set_psp_funcs(psp);
+ break;
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = true;
+ break;
+ case IP_VERSION(11, 0, 8):
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+ psp_v11_0_8_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ }
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = true;
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ break;
+ case IP_VERSION(13, 0, 4):
+ psp_v13_0_4_set_psp_funcs(psp);
+ psp->autoload_supported = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ psp->adev = adev;
+
+ psp_check_pmfw_centralized_cstate_management(psp);
+
+ if (amdgpu_sriov_vf(adev))
+ return psp_init_sriov_microcode(psp);
+ else
+ return psp_init_microcode(psp);
+}
+
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
+{
+ amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
+ &mem_ctx->shared_buf);
+ mem_ctx->shared_bo = NULL;
+}
+
+static void psp_free_shared_bufs(struct psp_context *psp)
+{
+ void *tmr_buf;
+ void **pptr;
+
+ /* free TMR memory buffer */
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ psp->tmr_bo = NULL;
+
+ /* free xgmi shared memory */
+ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
+
+ /* free ras shared memory */
+ psp_ta_free_shared_buf(&psp->ras_context.context.mem_context);
+
+ /* free hdcp shared memory */
+ psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context);
+
+ /* free dtm shared memory */
+ psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context);
+
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+
+
+}
+
+static void psp_memory_training_fini(struct psp_context *psp)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ kfree(ctx->sys_cache);
+ ctx->sys_cache = NULL;
+}
+
+static int psp_memory_training_init(struct psp_context *psp)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+ DRM_DEBUG("memory training is not supported!\n");
+ return 0;
+ }
+
+ ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+ if (ctx->sys_cache == NULL) {
+ DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ ret = -ENOMEM;
+ goto Err_out;
+ }
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+ ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+ return 0;
+
+Err_out:
+ psp_memory_training_fini(psp);
+ return ret;
+}
+
+/*
+ * Helper funciton to query psp runtime database entry
+ *
+ * @adev: amdgpu_device pointer
+ * @entry_type: the type of psp runtime database entry
+ * @db_entry: runtime database entry pointer
+ *
+ * Return false if runtime database doesn't exit or entry is invalid
+ * or true if the specific database entry is found, and copy to @db_entry
+ */
+static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
+ enum psp_runtime_entry_type entry_type,
+ void *db_entry)
+{
+ uint64_t db_header_pos, db_dir_pos;
+ struct psp_runtime_data_header db_header = {0};
+ struct psp_runtime_data_directory db_dir = {0};
+ bool ret = false;
+ int i;
+
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
+ return false;
+
+ db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
+ db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
+
+ /* read runtime db header from vram */
+ amdgpu_device_vram_access(adev, db_header_pos, (uint32_t *)&db_header,
+ sizeof(struct psp_runtime_data_header), false);
+
+ if (db_header.cookie != PSP_RUNTIME_DB_COOKIE_ID) {
+ /* runtime db doesn't exist, exit */
+ dev_dbg(adev->dev, "PSP runtime database doesn't exist\n");
+ return false;
+ }
+
+ /* read runtime database entry from vram */
+ amdgpu_device_vram_access(adev, db_dir_pos, (uint32_t *)&db_dir,
+ sizeof(struct psp_runtime_data_directory), false);
+
+ if (db_dir.entry_count >= PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT) {
+ /* invalid db entry count, exit */
+ dev_warn(adev->dev, "Invalid PSP runtime database entry count\n");
+ return false;
+ }
+
+ /* look up for requested entry type */
+ for (i = 0; i < db_dir.entry_count && !ret; i++) {
+ if (db_dir.entry_list[i].entry_type == entry_type) {
+ switch (entry_type) {
+ case PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG:
+ if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_boot_cfg_entry)) {
+ /* invalid db entry size */
+ dev_warn(adev->dev, "Invalid PSP runtime database boot cfg entry size\n");
+ return false;
+ }
+ /* read runtime database entry */
+ amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+ (uint32_t *)db_entry, sizeof(struct psp_runtime_boot_cfg_entry), false);
+ ret = true;
+ break;
+ case PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS:
+ if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_scpm_entry)) {
+ /* invalid db entry size */
+ dev_warn(adev->dev, "Invalid PSP runtime database scpm entry size\n");
+ return false;
+ }
+ /* read runtime database entry */
+ amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+ (uint32_t *)db_entry, sizeof(struct psp_runtime_scpm_entry), false);
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int psp_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+ int ret;
+ struct psp_runtime_boot_cfg_entry boot_cfg_entry;
+ struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx;
+ struct psp_runtime_scpm_entry scpm_entry;
+
+ psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!psp->cmd) {
+ DRM_ERROR("Failed to allocate memory to command buffer!\n");
+ ret = -ENOMEM;
+ }
+
+ adev->psp.xgmi_context.supports_extended_data =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2);
+
+ memset(&scpm_entry, 0, sizeof(scpm_entry));
+ if ((psp_get_runtime_db_entry(adev,
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
+ &scpm_entry)) &&
+ (scpm_entry.scpm_status != SCPM_DISABLE)) {
+ adev->scpm_enabled = true;
+ adev->scpm_status = scpm_entry.scpm_status;
+ } else {
+ adev->scpm_enabled = false;
+ adev->scpm_status = SCPM_DISABLE;
+ }
+
+ /* TODO: stop gpu driver services and print alarm if scpm is enabled with error status */
+
+ memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry));
+ if (psp_get_runtime_db_entry(adev,
+ PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG,
+ &boot_cfg_entry)) {
+ psp->boot_cfg_bitmask = boot_cfg_entry.boot_cfg_bitmask;
+ if ((psp->boot_cfg_bitmask) &
+ BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING) {
+ /* If psp runtime database exists, then
+ * only enable two stage memory training
+ * when TWO_STAGE_DRAM_TRAINING bit is set
+ * in runtime database
+ */
+ mem_training_ctx->enable_mem_training = true;
+ }
+
+ } else {
+ /* If psp runtime database doesn't exist or is
+ * invalid, force enable two stage memory training
+ */
+ mem_training_ctx->enable_mem_training = true;
+ }
+
+ if (mem_training_ctx->enable_mem_training) {
+ ret = psp_memory_training_init(psp);
+ if (ret) {
+ DRM_ERROR("Failed to initialize memory training!\n");
+ return ret;
+ }
+
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+ }
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+ amdgpu_sriov_vf(adev) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr,
+ &psp->fw_pri_buf);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr,
+ &psp->fence_buf);
+ if (ret)
+ goto failed1;
+
+ ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
+ if (ret)
+ goto failed2;
+
+ return 0;
+
+failed2:
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ return ret;
+}
+
+static int psp_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd = psp->cmd;
+
+ psp_memory_training_fini(psp);
+
+ amdgpu_ucode_release(&psp->sos_fw);
+ amdgpu_ucode_release(&psp->asd_fw);
+ amdgpu_ucode_release(&psp->ta_fw);
+ amdgpu_ucode_release(&psp->cap_fw);
+ amdgpu_ucode_release(&psp->toc_fw);
+
+ kfree(cmd);
+ cmd = NULL;
+
+ psp_free_shared_bufs(psp);
+
+ if (psp->km_ring.ring_mem)
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &psp->km_ring.ring_mem_mc_addr,
+ (void **)&psp->km_ring.ring_mem);
+
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+ amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+ &psp->fence_buf_mc_addr, &psp->fence_buf);
+ amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+ (void **)&psp->cmd_buf_mem);
+
+ return 0;
+}
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, bool check_changed)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ val = RREG32(reg_index);
+ if (check_changed) {
+ if (val != reg_val)
+ return 0;
+ } else {
+ if ((val & mask) == reg_val)
+ return 0;
+ }
+ udelay(1);
+ }
+
+ return -ETIME;
+}
+
+int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t reg_val, uint32_t mask, uint32_t msec_timeout)
+{
+ uint32_t val;
+ int i;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ for (i = 0; i < msec_timeout; i++) {
+ val = RREG32(reg_index);
+ if ((val & mask) == reg_val)
+ return 0;
+ msleep(1);
+ }
+
+ return -ETIME;
+}
+
+static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
+{
+ switch (cmd_id) {
+ case GFX_CMD_ID_LOAD_TA:
+ return "LOAD_TA";
+ case GFX_CMD_ID_UNLOAD_TA:
+ return "UNLOAD_TA";
+ case GFX_CMD_ID_INVOKE_CMD:
+ return "INVOKE_CMD";
+ case GFX_CMD_ID_LOAD_ASD:
+ return "LOAD_ASD";
+ case GFX_CMD_ID_SETUP_TMR:
+ return "SETUP_TMR";
+ case GFX_CMD_ID_LOAD_IP_FW:
+ return "LOAD_IP_FW";
+ case GFX_CMD_ID_DESTROY_TMR:
+ return "DESTROY_TMR";
+ case GFX_CMD_ID_SAVE_RESTORE:
+ return "SAVE_RESTORE_IP_FW";
+ case GFX_CMD_ID_SETUP_VMR:
+ return "SETUP_VMR";
+ case GFX_CMD_ID_DESTROY_VMR:
+ return "DESTROY_VMR";
+ case GFX_CMD_ID_PROG_REG:
+ return "PROG_REG";
+ case GFX_CMD_ID_GET_FW_ATTESTATION:
+ return "GET_FW_ATTESTATION";
+ case GFX_CMD_ID_LOAD_TOC:
+ return "ID_LOAD_TOC";
+ case GFX_CMD_ID_AUTOLOAD_RLC:
+ return "AUTOLOAD_RLC";
+ case GFX_CMD_ID_BOOT_CFG:
+ return "BOOT_CFG";
+ default:
+ return "UNKNOWN CMD";
+ }
+}
+
+static int
+psp_cmd_submit_buf(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
+{
+ int ret;
+ int index;
+ int timeout = 20000;
+ bool ras_intr = false;
+ bool skip_unsupport = false;
+
+ if (psp->adev->no_hw_access)
+ return 0;
+
+ memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
+
+ memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
+
+ index = atomic_inc_return(&psp->fence_value);
+ ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
+ if (ret) {
+ atomic_dec(&psp->fence_value);
+ goto exit;
+ }
+
+ amdgpu_device_invalidate_hdp(psp->adev, NULL);
+ while (*((unsigned int *)psp->fence_buf) != index) {
+ if (--timeout == 0)
+ break;
+ /*
+ * Shouldn't wait for timeout when err_event_athub occurs,
+ * because gpu reset thread triggered and lock resource should
+ * be released for psp resume sequence.
+ */
+ ras_intr = amdgpu_ras_intr_triggered();
+ if (ras_intr)
+ break;
+ usleep_range(10, 100);
+ amdgpu_device_invalidate_hdp(psp->adev, NULL);
+ }
+
+ /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
+ skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
+ psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
+
+ memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
+
+ /* In some cases, psp response status is not 0 even there is no
+ * problem while the command is submitted. Some version of PSP FW
+ * doesn't write 0 to that field.
+ * So here we would like to only print a warning instead of an error
+ * during psp initialization to avoid breaking hw_init and it doesn't
+ * return -EINVAL.
+ */
+ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
+ if (ucode)
+ DRM_WARN("failed to load ucode %s(0x%X) ",
+ amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+ DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
+ /* If any firmware (including CAP) load fails under SRIOV, it should
+ * return failure to stop the VF from initializing.
+ * Also return failure in case of timeout
+ */
+ if ((ucode && amdgpu_sriov_vf(psp->adev)) || !timeout) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+
+ if (ucode) {
+ ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
+ ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
+ }
+
+exit:
+ return ret;
+}
+
+static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd;
+
+ mutex_lock(&psp->mutex);
+
+ memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
+
+ return cmd;
+}
+
+static void release_psp_cmd_buf(struct psp_context *psp)
+{
+ mutex_unlock(&psp->mutex);
+}
+
+static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
+ struct psp_gfx_cmd_resp *cmd,
+ uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t size = 0;
+ uint64_t tmr_pa = 0;
+
+ if (tmr_bo) {
+ size = amdgpu_bo_size(tmr_bo);
+ tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+ }
+
+ if (amdgpu_sriov_vf(psp->adev))
+ cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
+ else
+ cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
+ cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
+ cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
+ cmd->cmd.cmd_setup_tmr.buf_size = size;
+ cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1;
+ cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa);
+ cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa);
+}
+
+static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t pri_buf_mc, uint32_t size)
+{
+ cmd->cmd_id = GFX_CMD_ID_LOAD_TOC;
+ cmd->cmd.cmd_load_toc.toc_phy_addr_lo = lower_32_bits(pri_buf_mc);
+ cmd->cmd.cmd_load_toc.toc_phy_addr_hi = upper_32_bits(pri_buf_mc);
+ cmd->cmd.cmd_load_toc.toc_size = size;
+}
+
+/* Issue LOAD TOC cmd to PSP to part toc and calculate tmr size needed */
+static int psp_load_toc(struct psp_context *psp,
+ uint32_t *tmr_size)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ /* Copy toc to psp firmware private buffer */
+ psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes);
+
+ psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (!ret)
+ *tmr_size = psp->cmd_buf_mem->resp.tmr_size;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static bool psp_boottime_tmr(struct psp_context *psp)
+{
+ switch (psp->adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 6):
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Set up Trusted Memory Region */
+static int psp_tmr_init(struct psp_context *psp)
+{
+ int ret = 0;
+ int tmr_size;
+ void *tmr_buf;
+ void **pptr;
+
+ /*
+ * According to HW engineer, they prefer the TMR address be "naturally
+ * aligned" , e.g. the start address be an integer divide of TMR size.
+ *
+ * Note: this memory need be reserved till the driver
+ * uninitializes.
+ */
+ tmr_size = PSP_TMR_SIZE(psp->adev);
+
+ /* For ASICs support RLC autoload, psp will parse the toc
+ * and calculate the total size of TMR needed
+ */
+ if (!amdgpu_sriov_vf(psp->adev) &&
+ psp->toc.start_addr &&
+ psp->toc.size_bytes &&
+ psp->fw_pri_buf) {
+ ret = psp_load_toc(psp, &tmr_size);
+ if (ret) {
+ DRM_ERROR("Failed to load toc\n");
+ return ret;
+ }
+ }
+
+ if (!psp->tmr_bo) {
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
+ PSP_TMR_ALIGNMENT,
+ AMDGPU_HAS_VRAM(psp->adev) ?
+ AMDGPU_GEM_DOMAIN_VRAM :
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->tmr_bo, &psp->tmr_mc_addr,
+ pptr);
+ }
+
+ return ret;
+}
+
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+ switch (psp->adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int psp_tmr_load(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+ * Already set up by host driver.
+ */
+ if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
+ if (psp->tmr_bo)
+ DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
+ amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
+ struct psp_gfx_cmd_resp *cmd)
+{
+ if (amdgpu_sriov_vf(psp->adev))
+ cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
+ else
+ cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR;
+}
+
+static int psp_tmr_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV,
+ * as TMR is not loaded at all
+ */
+ if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_tmr_unload_cmd_buf(psp, cmd);
+ dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_tmr_terminate(struct psp_context *psp)
+{
+ return psp_tmr_unload(psp);
+}
+
+int psp_get_fw_attestation_records_addr(struct psp_context *psp,
+ uint64_t *output_ptr)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (!output_ptr)
+ return -EINVAL;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ if (!ret) {
+ *output_ptr = ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_lo) +
+ ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32);
+ }
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg)
+{
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+ cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (!ret) {
+ *boot_cfg =
+ (cmd->resp.uresp.boot_cfg.boot_cfg & BOOT_CONFIG_GECC) ? 1 : 0;
+ }
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg)
+{
+ int ret;
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+ cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET;
+ cmd->cmd.boot_cfg.boot_config = boot_cfg;
+ cmd->cmd.boot_cfg.boot_config_valid = boot_cfg;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_rl_load(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_context *psp = &adev->psp;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (!is_psp_fw_valid(psp->rl))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes);
+
+ cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes;
+ cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+ cmd->cmd.cmd_spatial_part.mode = mode;
+
+ dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_asd_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /* If PSP version doesn't match ASD version, asd loading will be failed.
+ * add workaround to bypass it for sriov now.
+ * TODO: add version check to make it common
+ */
+ if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
+ return 0;
+
+ psp->asd_context.mem_context.shared_mc_addr = 0;
+ psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
+ psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD;
+
+ ret = psp_ta_load(psp, &psp->asd_context);
+ if (!ret)
+ psp->asd_context.initialized = true;
+
+ return ret;
+}
+
+static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
+ cmd->cmd.cmd_unload_ta.session_id = session_id;
+}
+
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_ta_unload_cmd_buf(cmd, context->session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ context->resp_status = cmd->resp.status;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_asd_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->asd_context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->asd_context);
+ if (!ret)
+ psp->asd_context.initialized = false;
+
+ return ret;
+}
+
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t id, uint32_t value)
+{
+ cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+ cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+ cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret = 0;
+
+ if (reg >= PSP_REG_LAST)
+ return -EINVAL;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ DRM_ERROR("PSP failed to program reg id %d", reg);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t ta_bin_mc,
+ struct ta_context *context)
+{
+ cmd->cmd_id = context->ta_load_type;
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes;
+
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo =
+ lower_32_bits(context->mem_context.shared_mc_addr);
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi =
+ upper_32_bits(context->mem_context.shared_mc_addr);
+ cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size;
+}
+
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx)
+{
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for ta to host memory
+ */
+ return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &mem_ctx->shared_bo,
+ &mem_ctx->shared_mc_addr,
+ &mem_ctx->shared_buf);
+}
+
+static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t ta_cmd_id,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
+ cmd->cmd.cmd_invoke_cmd.session_id = session_id;
+ cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
+}
+
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, context->session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ context->resp_status = cmd->resp.status;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_ta_load(struct psp_context *psp, struct ta_context *context)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ psp_copy_fw(psp, context->bin_desc.start_addr,
+ context->bin_desc.size_bytes);
+
+ psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ context->resp_status = cmd->resp.status;
+
+ if (!ret)
+ context->session_id = cmd->resp.session_id;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->xgmi_context.context);
+}
+
+int psp_xgmi_terminate(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
+ (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ adev->gmc.xgmi.connected_to_cpu))
+ return 0;
+
+ if (!psp->xgmi_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->xgmi_context.context);
+
+ psp->xgmi_context.context.initialized = false;
+
+ return ret;
+}
+
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta)
+{
+ struct ta_xgmi_shared_memory *xgmi_cmd;
+ int ret;
+
+ if (!psp->ta_fw ||
+ !psp->xgmi_context.context.bin_desc.size_bytes ||
+ !psp->xgmi_context.context.bin_desc.start_addr)
+ return -ENOENT;
+
+ if (!load_ta)
+ goto invoke;
+
+ psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
+ psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->xgmi_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ /* Load XGMI TA */
+ ret = psp_ta_load(psp, &psp->xgmi_context.context);
+ if (!ret)
+ psp->xgmi_context.context.initialized = true;
+ else
+ return ret;
+
+invoke:
+ /* Initialize XGMI session */
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.context.mem_context.shared_buf);
+ memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+ xgmi_cmd->flag_extend_link_record = set_extended_data;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
+
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+
+ return ret;
+}
+
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
+{
+ struct ta_xgmi_shared_memory *xgmi_cmd;
+ int ret;
+
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+ memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
+
+ /* Invoke xgmi ta to get hive id */
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ if (ret)
+ return ret;
+
+ *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
+
+ return 0;
+}
+
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
+{
+ struct ta_xgmi_shared_memory *xgmi_cmd;
+ int ret;
+
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+ memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
+
+ /* Invoke xgmi ta to get the node id */
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ if (ret)
+ return ret;
+
+ *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id;
+
+ return 0;
+}
+
+static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
+{
+ return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
+ psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6);
+}
+
+/*
+ * Chips that support extended topology information require the driver to
+ * reflect topology information in the opposite direction. This is
+ * because the TA has already exceeded its link record limit and if the
+ * TA holds bi-directional information, the driver would have to do
+ * multiple fetches instead of just two.
+ */
+static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
+ struct psp_xgmi_node_info node_info)
+{
+ struct amdgpu_device *mirror_adev;
+ struct amdgpu_hive_info *hive;
+ uint64_t src_node_id = psp->adev->gmc.xgmi.node_id;
+ uint64_t dst_node_id = node_info.node_id;
+ uint8_t dst_num_hops = node_info.num_hops;
+ uint8_t dst_num_links = node_info.num_links;
+
+ hive = amdgpu_get_xgmi_hive(psp->adev);
+ list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
+ struct psp_xgmi_topology_info *mirror_top_info;
+ int j;
+
+ if (mirror_adev->gmc.xgmi.node_id != dst_node_id)
+ continue;
+
+ mirror_top_info = &mirror_adev->psp.xgmi_context.top_info;
+ for (j = 0; j < mirror_top_info->num_nodes; j++) {
+ if (mirror_top_info->nodes[j].node_id != src_node_id)
+ continue;
+
+ mirror_top_info->nodes[j].num_hops = dst_num_hops;
+ /*
+ * prevent 0 num_links value re-reflection since reflection
+ * criteria is based on num_hops (direct or indirect).
+ *
+ */
+ if (dst_num_links)
+ mirror_top_info->nodes[j].num_links = dst_num_links;
+
+ break;
+ }
+
+ break;
+ }
+
+ amdgpu_put_xgmi_hive(hive);
+}
+
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+ int number_devices,
+ struct psp_xgmi_topology_info *topology,
+ bool get_extended_data)
+{
+ struct ta_xgmi_shared_memory *xgmi_cmd;
+ struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+ struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
+ int i;
+ int ret;
+
+ if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+ return -EINVAL;
+
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+ memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+ xgmi_cmd->flag_extend_link_record = get_extended_data;
+
+ /* Fill in the shared memory with topology information as input */
+ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
+ topology_info_input->num_nodes = number_devices;
+
+ for (i = 0; i < topology_info_input->num_nodes; i++) {
+ topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+ topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+ topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
+ topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+ }
+
+ /* Invoke xgmi ta to get the topology information */
+ ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
+ if (ret)
+ return ret;
+
+ /* Read the output topology information from the shared memory */
+ topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
+ topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
+ for (i = 0; i < topology->num_nodes; i++) {
+ /* extended data will either be 0 or equal to non-extended data */
+ if (topology_info_output->nodes[i].num_hops)
+ topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
+
+ /* non-extended data gets everything here so no need to update */
+ if (!get_extended_data) {
+ topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
+ topology->nodes[i].is_sharing_enabled =
+ topology_info_output->nodes[i].is_sharing_enabled;
+ topology->nodes[i].sdma_engine =
+ topology_info_output->nodes[i].sdma_engine;
+ }
+
+ }
+
+ /* Invoke xgmi ta again to get the link information */
+ if (psp_xgmi_peer_link_info_supported(psp)) {
+ struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
+ bool requires_reflection =
+ (psp->xgmi_context.supports_extended_data && get_extended_data) ||
+ psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6);
+
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+
+ ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS);
+
+ if (ret)
+ return ret;
+
+ link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
+ for (i = 0; i < topology->num_nodes; i++) {
+ /* accumulate num_links on extended data */
+ topology->nodes[i].num_links = get_extended_data ?
+ topology->nodes[i].num_links +
+ link_info_output->nodes[i].num_links :
+ ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links :
+ link_info_output->nodes[i].num_links);
+
+ /* reflect the topology information for bi-directionality */
+ if (requires_reflection && topology->nodes[i].num_hops)
+ psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
+ }
+ }
+
+ return 0;
+}
+
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+ int number_devices,
+ struct psp_xgmi_topology_info *topology)
+{
+ struct ta_xgmi_shared_memory *xgmi_cmd;
+ struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+ int i;
+
+ if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+ return -EINVAL;
+
+ xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+ memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
+ topology_info_input->num_nodes = number_devices;
+
+ for (i = 0; i < topology_info_input->num_nodes; i++) {
+ topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+ topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+ topology_info_input->nodes[i].is_sharing_enabled = 1;
+ topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+ }
+
+ /* Invoke xgmi ta to set topology information */
+ return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
+}
+
+// ras begin
+static void psp_ras_ta_check_status(struct psp_context *psp)
+{
+ struct ta_ras_shared_memory *ras_cmd =
+ (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+
+ switch (ras_cmd->ras_status) {
+ case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: cmd failed due to unsupported ip\n");
+ break;
+ case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: cmd failed due to unsupported error injection\n");
+ break;
+ case TA_RAS_STATUS__SUCCESS:
+ break;
+ case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
+ if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: Inject error to critical region is not allowed\n");
+ break;
+ default:
+ dev_warn(psp->adev->dev,
+ "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+ break;
+ }
+}
+
+int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ struct ta_ras_shared_memory *ras_cmd;
+ int ret;
+
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ ret = psp_ta_invoke(psp, ta_cmd_id, &psp->ras_context.context);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret;
+
+ if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
+ DRM_WARN("RAS: Unsupported Interface");
+ return -EINVAL;
+ }
+
+ if (!ret) {
+ if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) {
+ dev_warn(psp->adev->dev, "ECC switch disabled\n");
+
+ ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
+ } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+ dev_warn(psp->adev->dev,
+ "RAS internal register access blocked\n");
+
+ psp_ras_ta_check_status(psp);
+ }
+
+ return ret;
+}
+
+int psp_ras_enable_features(struct psp_context *psp,
+ union ta_ras_cmd_input *info, bool enable)
+{
+ struct ta_ras_shared_memory *ras_cmd;
+ int ret;
+
+ if (!psp->ras_context.context.initialized)
+ return -EINVAL;
+
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+ if (enable)
+ ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
+ else
+ ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
+
+ ras_cmd->ras_in_message = *info;
+
+ ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ if (ret)
+ return -EINVAL;
+
+ return 0;
+}
+
+int psp_ras_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->ras_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->ras_context.context);
+
+ psp->ras_context.context.initialized = false;
+
+ return ret;
+}
+
+int psp_ras_initialize(struct psp_context *psp)
+{
+ int ret;
+ uint32_t boot_cfg = 0xFF;
+ struct amdgpu_device *adev = psp->adev;
+ struct ta_ras_shared_memory *ras_cmd;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (!adev->psp.ras_context.context.bin_desc.size_bytes ||
+ !adev->psp.ras_context.context.bin_desc.start_addr) {
+ dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n");
+ return 0;
+ }
+
+ if (amdgpu_atomfirmware_dynamic_boot_config_supported(adev)) {
+ /* query GECC enablement status from boot config
+ * boot_cfg: 1: GECC is enabled or 0: GECC is disabled
+ */
+ ret = psp_boot_config_get(adev, &boot_cfg);
+ if (ret)
+ dev_warn(adev->dev, "PSP get boot config failed\n");
+
+ if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
+ if (!boot_cfg) {
+ dev_info(adev->dev, "GECC is disabled\n");
+ } else {
+ /* disable GECC in next boot cycle if ras is
+ * disabled by module parameter amdgpu_ras_enable
+ * and/or amdgpu_ras_mask, or boot_config_get call
+ * is failed
+ */
+ ret = psp_boot_config_set(adev, 0);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ }
+ } else {
+ if (boot_cfg == 1) {
+ dev_info(adev->dev, "GECC is enabled\n");
+ } else {
+ /* enable GECC in next boot cycle if it is disabled
+ * in boot config, or force enable GECC if failed to
+ * get boot configuration
+ */
+ ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ }
+ }
+ }
+
+ psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;
+ psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->ras_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+ if (amdgpu_ras_is_poison_mode_supported(adev))
+ ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+ ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+ ras_cmd->ras_in_message.init_flags.xcc_mask =
+ adev->gfx.xcc_mask;
+ ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+
+ ret = psp_ta_load(psp, &psp->ras_context.context);
+
+ if (!ret && !ras_cmd->ras_status)
+ psp->ras_context.context.initialized = true;
+ else {
+ if (ras_cmd->ras_status)
+ dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+
+ /* fail to load RAS TA */
+ psp->ras_context.context.initialized = false;
+ }
+
+ return ret;
+}
+
+int psp_ras_trigger_error(struct psp_context *psp,
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
+{
+ struct ta_ras_shared_memory *ras_cmd;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+ uint32_t dev_mask;
+
+ if (!psp->ras_context.context.initialized)
+ return -EINVAL;
+
+ switch (info->block_id) {
+ case TA_RAS_BLOCK__GFX:
+ dev_mask = GET_MASK(GC, instance_mask);
+ break;
+ case TA_RAS_BLOCK__SDMA:
+ dev_mask = GET_MASK(SDMA0, instance_mask);
+ break;
+ case TA_RAS_BLOCK__VCN:
+ case TA_RAS_BLOCK__JPEG:
+ dev_mask = GET_MASK(VCN, instance_mask);
+ break;
+ default:
+ dev_mask = instance_mask;
+ break;
+ }
+
+ /* reuse sub_block_index for backward compatibility */
+ dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+ dev_mask &= AMDGPU_RAS_INST_MASK;
+ info->sub_block_index |= dev_mask;
+
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+ ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
+ ras_cmd->ras_in_message.trigger_error = *info;
+
+ ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ if (ret)
+ return -EINVAL;
+
+ /* If err_event_athub occurs error inject was successful, however
+ * return status from TA is no long reliable
+ */
+ if (amdgpu_ras_intr_triggered())
+ return 0;
+
+ if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+ return -EACCES;
+ else if (ras_cmd->ras_status)
+ return -EINVAL;
+
+ return 0;
+}
+// ras end
+
+// HDCP start
+static int psp_hdcp_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->hdcp_context.context.bin_desc.size_bytes ||
+ !psp->hdcp_context.context.bin_desc.start_addr) {
+ dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
+ return 0;
+ }
+
+ psp->hdcp_context.context.mem_context.shared_mem_size = PSP_HDCP_SHARED_MEM_SIZE;
+ psp->hdcp_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->hdcp_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_ta_load(psp, &psp->hdcp_context.context);
+ if (!ret) {
+ psp->hdcp_context.context.initialized = true;
+ mutex_init(&psp->hdcp_context.mutex);
+ }
+
+ return ret;
+}
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
+}
+
+static int psp_hdcp_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->hdcp_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->hdcp_context.context);
+
+ psp->hdcp_context.context.initialized = false;
+
+ return ret;
+}
+// HDCP end
+
+// DTM start
+static int psp_dtm_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->dtm_context.context.bin_desc.size_bytes ||
+ !psp->dtm_context.context.bin_desc.start_addr) {
+ dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
+ return 0;
+ }
+
+ psp->dtm_context.context.mem_context.shared_mem_size = PSP_DTM_SHARED_MEM_SIZE;
+ psp->dtm_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->dtm_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_ta_load(psp, &psp->dtm_context.context);
+ if (!ret) {
+ psp->dtm_context.context.initialized = true;
+ mutex_init(&psp->dtm_context.mutex);
+ }
+
+ return ret;
+}
+
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
+}
+
+static int psp_dtm_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->dtm_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->dtm_context.context);
+
+ psp->dtm_context.context.initialized = false;
+
+ return ret;
+}
+// DTM end
+
+// RAP start
+static int psp_rap_initialize(struct psp_context *psp)
+{
+ int ret;
+ enum ta_rap_status status = TA_RAP_STATUS__SUCCESS;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->rap_context.context.bin_desc.size_bytes ||
+ !psp->rap_context.context.bin_desc.start_addr) {
+ dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n");
+ return 0;
+ }
+
+ psp->rap_context.context.mem_context.shared_mem_size = PSP_RAP_SHARED_MEM_SIZE;
+ psp->rap_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->rap_context.context.mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_ta_load(psp, &psp->rap_context.context);
+ if (!ret) {
+ psp->rap_context.context.initialized = true;
+ mutex_init(&psp->rap_context.mutex);
+ } else
+ return ret;
+
+ ret = psp_rap_invoke(psp, TA_CMD_RAP__INITIALIZE, &status);
+ if (ret || status != TA_RAP_STATUS__SUCCESS) {
+ psp_rap_terminate(psp);
+ /* free rap shared memory */
+ psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+ dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n",
+ ret, status);
+
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_rap_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ if (!psp->rap_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->rap_context.context);
+
+ psp->rap_context.context.initialized = false;
+
+ return ret;
+}
+
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status)
+{
+ struct ta_rap_shared_memory *rap_cmd;
+ int ret = 0;
+
+ if (!psp->rap_context.context.initialized)
+ return 0;
+
+ if (ta_cmd_id != TA_CMD_RAP__INITIALIZE &&
+ ta_cmd_id != TA_CMD_RAP__VALIDATE_L0)
+ return -EINVAL;
+
+ mutex_lock(&psp->rap_context.mutex);
+
+ rap_cmd = (struct ta_rap_shared_memory *)
+ psp->rap_context.context.mem_context.shared_buf;
+ memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory));
+
+ rap_cmd->cmd_id = ta_cmd_id;
+ rap_cmd->validation_method_id = METHOD_A;
+
+ ret = psp_ta_invoke(psp, rap_cmd->cmd_id, &psp->rap_context.context);
+ if (ret)
+ goto out_unlock;
+
+ if (status)
+ *status = rap_cmd->rap_status;
+
+out_unlock:
+ mutex_unlock(&psp->rap_context.mutex);
+
+ return ret;
+}
+// RAP end
+
+/* securedisplay start */
+static int psp_securedisplay_initialize(struct psp_context *psp)
+{
+ int ret;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
+ !psp->securedisplay_context.context.bin_desc.start_addr) {
+ dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+ return 0;
+ }
+
+ psp->securedisplay_context.context.mem_context.shared_mem_size =
+ PSP_SECUREDISPLAY_SHARED_MEM_SIZE;
+ psp->securedisplay_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+ if (!psp->securedisplay_context.context.initialized) {
+ ret = psp_ta_init_shared_buf(psp,
+ &psp->securedisplay_context.context.mem_context);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_ta_load(psp, &psp->securedisplay_context.context);
+ if (!ret) {
+ psp->securedisplay_context.context.initialized = true;
+ mutex_init(&psp->securedisplay_context.mutex);
+ } else
+ return ret;
+
+ mutex_lock(&psp->securedisplay_context.mutex);
+
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+ mutex_unlock(&psp->securedisplay_context.mutex);
+
+ if (ret) {
+ psp_securedisplay_terminate(psp);
+ /* free securedisplay shared memory */
+ psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+ dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n");
+ return -EINVAL;
+ }
+
+ if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) {
+ psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+ dev_err(psp->adev->dev, "SECUREDISPLAY: query securedisplay TA failed. ret 0x%x\n",
+ securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+ /* don't try again */
+ psp->securedisplay_context.context.bin_desc.size_bytes = 0;
+ }
+
+ return 0;
+}
+
+static int psp_securedisplay_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO:bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->securedisplay_context.context.initialized)
+ return 0;
+
+ ret = psp_ta_unload(psp, &psp->securedisplay_context.context);
+
+ psp->securedisplay_context.context.initialized = false;
+
+ return ret;
+}
+
+int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ int ret;
+
+ if (!psp->securedisplay_context.context.initialized)
+ return -EINVAL;
+
+ if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ return -EINVAL;
+
+ ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
+
+ return ret;
+}
+/* SECUREDISPLAY end */
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+ int ret = 0;
+
+ if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+ ret = psp->funcs->wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_hw_start(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((is_psp_fw_valid(psp->kdb)) &&
+ (psp->funcs->bootloader_load_kdb != NULL)) {
+ ret = psp_bootloader_load_kdb(psp);
+ if (ret) {
+ DRM_ERROR("PSP load kdb failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->spl)) &&
+ (psp->funcs->bootloader_load_spl != NULL)) {
+ ret = psp_bootloader_load_spl(psp);
+ if (ret) {
+ DRM_ERROR("PSP load spl failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->sys)) &&
+ (psp->funcs->bootloader_load_sysdrv != NULL)) {
+ ret = psp_bootloader_load_sysdrv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load sys drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->soc_drv)) &&
+ (psp->funcs->bootloader_load_soc_drv != NULL)) {
+ ret = psp_bootloader_load_soc_drv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load soc drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->intf_drv)) &&
+ (psp->funcs->bootloader_load_intf_drv != NULL)) {
+ ret = psp_bootloader_load_intf_drv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load intf drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->dbg_drv)) &&
+ (psp->funcs->bootloader_load_dbg_drv != NULL)) {
+ ret = psp_bootloader_load_dbg_drv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load dbg drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ras_drv)) &&
+ (psp->funcs->bootloader_load_ras_drv != NULL)) {
+ ret = psp_bootloader_load_ras_drv(psp);
+ if (ret) {
+ DRM_ERROR("PSP load ras_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->sos)) &&
+ (psp->funcs->bootloader_load_sos != NULL)) {
+ ret = psp_bootloader_load_sos(psp);
+ if (ret) {
+ DRM_ERROR("PSP load sos failed!\n");
+ return ret;
+ }
+ }
+ }
+
+ ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
+ if (ret) {
+ DRM_ERROR("PSP create ring failed!\n");
+ return ret;
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ goto skip_pin_bo;
+
+ if (!psp_boottime_tmr(psp)) {
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ DRM_ERROR("PSP tmr init failed!\n");
+ return ret;
+ }
+ }
+
+skip_pin_bo:
+ /*
+ * For ASICs with DF Cstate management centralized
+ * to PMFW, TMR setup should be performed after PMFW
+ * loaded and before other non-psp firmware loaded.
+ */
+ if (psp->pmfw_centralized_cstate_management) {
+ ret = psp_load_smu_fw(psp);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_tmr_load(psp);
+ if (ret) {
+ DRM_ERROR("PSP load tmr failed!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
+ enum psp_gfx_fw_type *type)
+{
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_CAP:
+ *type = GFX_FW_TYPE_CAP;
+ break;
+ case AMDGPU_UCODE_ID_SDMA0:
+ *type = GFX_FW_TYPE_SDMA0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA1:
+ *type = GFX_FW_TYPE_SDMA1;
+ break;
+ case AMDGPU_UCODE_ID_SDMA2:
+ *type = GFX_FW_TYPE_SDMA2;
+ break;
+ case AMDGPU_UCODE_ID_SDMA3:
+ *type = GFX_FW_TYPE_SDMA3;
+ break;
+ case AMDGPU_UCODE_ID_SDMA4:
+ *type = GFX_FW_TYPE_SDMA4;
+ break;
+ case AMDGPU_UCODE_ID_SDMA5:
+ *type = GFX_FW_TYPE_SDMA5;
+ break;
+ case AMDGPU_UCODE_ID_SDMA6:
+ *type = GFX_FW_TYPE_SDMA6;
+ break;
+ case AMDGPU_UCODE_ID_SDMA7:
+ *type = GFX_FW_TYPE_SDMA7;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES:
+ *type = GFX_FW_TYPE_CP_MES;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ *type = GFX_FW_TYPE_MES_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ *type = GFX_FW_TYPE_CP_MES_KIQ;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ *type = GFX_FW_TYPE_MES_KIQ_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ *type = GFX_FW_TYPE_CP_CE;
+ break;
+ case AMDGPU_UCODE_ID_CP_PFP:
+ *type = GFX_FW_TYPE_CP_PFP;
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ *type = GFX_FW_TYPE_CP_ME;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ *type = GFX_FW_TYPE_CP_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ *type = GFX_FW_TYPE_CP_MEC_ME1;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ *type = GFX_FW_TYPE_CP_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ *type = GFX_FW_TYPE_CP_MEC_ME2;
+ break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ *type = GFX_FW_TYPE_RLC_P;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ *type = GFX_FW_TYPE_RLC_V;
+ break;
+ case AMDGPU_UCODE_ID_RLC_G:
+ *type = GFX_FW_TYPE_RLC_G;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ *type = GFX_FW_TYPE_RLC_IRAM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ *type = GFX_FW_TYPE_RLC_DRAM_BOOT;
+ break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE0_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE1_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE2_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE3_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SMC:
+ *type = GFX_FW_TYPE_SMU;
+ break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ *type = GFX_FW_TYPE_PPTABLE;
+ break;
+ case AMDGPU_UCODE_ID_UVD:
+ *type = GFX_FW_TYPE_UVD;
+ break;
+ case AMDGPU_UCODE_ID_UVD1:
+ *type = GFX_FW_TYPE_UVD1;
+ break;
+ case AMDGPU_UCODE_ID_VCE:
+ *type = GFX_FW_TYPE_VCE;
+ break;
+ case AMDGPU_UCODE_ID_VCN:
+ *type = GFX_FW_TYPE_VCN;
+ break;
+ case AMDGPU_UCODE_ID_VCN1:
+ *type = GFX_FW_TYPE_VCN1;
+ break;
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ *type = GFX_FW_TYPE_DMCU_ERAM;
+ break;
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ *type = GFX_FW_TYPE_DMCU_ISR;
+ break;
+ case AMDGPU_UCODE_ID_VCN0_RAM:
+ *type = GFX_FW_TYPE_VCN0_RAM;
+ break;
+ case AMDGPU_UCODE_ID_VCN1_RAM:
+ *type = GFX_FW_TYPE_VCN1_RAM;
+ break;
+ case AMDGPU_UCODE_ID_DMCUB:
+ *type = GFX_FW_TYPE_DMUB;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH1;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ *type = GFX_FW_TYPE_IMU_I;
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ *type = GFX_FW_TYPE_IMU_D;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ *type = GFX_FW_TYPE_RS64_PFP;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ *type = GFX_FW_TYPE_RS64_ME;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ *type = GFX_FW_TYPE_RS64_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_PFP_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_ME_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P2_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
+ break;
+ case AMDGPU_UCODE_ID_MAXIMUM:
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void psp_print_fw_hdr(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+{
+ struct amdgpu_device *adev = psp->adev;
+ struct common_firmware_header *hdr;
+
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ case AMDGPU_UCODE_ID_SDMA1:
+ case AMDGPU_UCODE_ID_SDMA2:
+ case AMDGPU_UCODE_ID_SDMA3:
+ case AMDGPU_UCODE_ID_SDMA4:
+ case AMDGPU_UCODE_ID_SDMA5:
+ case AMDGPU_UCODE_ID_SDMA6:
+ case AMDGPU_UCODE_ID_SDMA7:
+ hdr = (struct common_firmware_header *)
+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_PFP:
+ hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_RLC_G:
+ hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(hdr);
+ break;
+ case AMDGPU_UCODE_ID_SMC:
+ hdr = (struct common_firmware_header *)adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(hdr);
+ break;
+ default:
+ break;
+ }
+}
+
+static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd)
+{
+ int ret;
+ uint64_t fw_mem_mc_addr = ucode->mc_addr;
+
+ cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
+
+ ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
+ if (ret)
+ DRM_ERROR("Unknown firmware type\n");
+
+ return ret;
+}
+
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+{
+ int ret = 0;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
+ if (!ret) {
+ ret = psp_cmd_submit_buf(psp, ucode, cmd,
+ psp->fence_buf_mc_addr);
+ }
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+static int psp_load_smu_fw(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_firmware_info *ucode =
+ &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+ struct amdgpu_ras *ras = psp->ras_context.ras;
+
+ /*
+ * Skip SMU FW reloading in case of using BACO for runpm only,
+ * as SMU is always alive.
+ */
+ if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
+ return 0;
+
+ if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if ((amdgpu_in_reset(adev) &&
+ ras && adev->ras_enabled &&
+ (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
+ adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
+ ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
+ if (ret)
+ DRM_WARN("Failed to set MP1 state prepare for reload\n");
+ }
+
+ ret = psp_execute_ip_fw_load(psp, ucode);
+
+ if (ret)
+ DRM_ERROR("PSP load smu failed!\n");
+
+ return ret;
+}
+
+static bool fw_load_skip_check(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+{
+ if (!ucode->fw || !ucode->ucode_size)
+ return true;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+ (psp_smu_reload_quirk(psp) ||
+ psp->autoload_supported ||
+ psp->pmfw_centralized_cstate_management))
+ return true;
+
+ if (amdgpu_sriov_vf(psp->adev) &&
+ amdgpu_virt_fw_load_skip_check(psp->adev, ucode->ucode_id))
+ return true;
+
+ if (psp->autoload_supported &&
+ (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
+ /* skip mec JT when autoload is enabled */
+ return true;
+
+ return false;
+}
+
+int psp_load_fw_list(struct psp_context *psp,
+ struct amdgpu_firmware_info **ucode_list, int ucode_count)
+{
+ int ret = 0, i;
+ struct amdgpu_firmware_info *ucode;
+
+ for (i = 0; i < ucode_count; ++i) {
+ ucode = ucode_list[i];
+ psp_print_fw_hdr(psp, ucode);
+ ret = psp_execute_ip_fw_load(psp, ucode);
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
+
+static int psp_load_non_psp_fw(struct psp_context *psp)
+{
+ int i, ret;
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp->autoload_supported &&
+ !psp->pmfw_centralized_cstate_management) {
+ ret = psp_load_smu_fw(psp);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+ !fw_load_skip_check(psp, ucode)) {
+ ret = psp_load_smu_fw(psp);
+ if (ret)
+ return ret;
+ continue;
+ }
+
+ if (fw_load_skip_check(psp, ucode))
+ continue;
+
+ if (psp->autoload_supported &&
+ (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7) ||
+ adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 11) ||
+ adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 12)) &&
+ (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
+ /* PSP only receive one SDMA fw for sienna_cichlid,
+ * as all four sdma fw are same
+ */
+ continue;
+
+ psp_print_fw_hdr(psp, ucode);
+
+ ret = psp_execute_ip_fw_load(psp, ucode);
+ if (ret)
+ return ret;
+
+ /* Start rlc autoload after psp recieved all the gfx firmware */
+ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+ adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
+ ret = psp_rlc_autoload_start(psp);
+ if (ret) {
+ DRM_ERROR("Failed to start rlc autoload\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int psp_load_fw(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+ /* should not destroy ring, only stop */
+ psp_ring_stop(psp, PSP_RING_TYPE__KM);
+ } else {
+ memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+
+ ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+ if (ret) {
+ DRM_ERROR("PSP ring init failed!\n");
+ goto failed;
+ }
+ }
+
+ ret = psp_hw_start(psp);
+ if (ret)
+ goto failed;
+
+ ret = psp_load_non_psp_fw(psp);
+ if (ret)
+ goto failed1;
+
+ ret = psp_asd_initialize(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ goto failed1;
+ }
+
+ ret = psp_rl_load(adev);
+ if (ret) {
+ DRM_ERROR("PSP load RL failed!\n");
+ goto failed1;
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp, false, true);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+ }
+
+ if (psp->ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+
+ ret = psp_rap_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAP: Failed to initialize RAP\n");
+
+ ret = psp_securedisplay_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "SECUREDISPLAY: Failed to initialize SECUREDISPLAY\n");
+ }
+
+ return 0;
+
+failed1:
+ psp_free_shared_bufs(psp);
+failed:
+ /*
+ * all cleanup jobs (xgmi terminate, ras terminate,
+ * ring destroy, cmd/fence/fw buffers destory,
+ * psp->cmd destory) are delayed to psp_hw_fini
+ */
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+ return ret;
+}
+
+static int psp_hw_init(void *handle)
+{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ mutex_lock(&adev->firmware.mutex);
+ /*
+ * This sequence is just used on hw_init only once, no need on
+ * resume.
+ */
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
+ ret = psp_load_fw(adev);
+ if (ret) {
+ DRM_ERROR("PSP firmware loading failed\n");
+ goto failed;
+ }
+
+ mutex_unlock(&adev->firmware.mutex);
+ return 0;
+
+failed:
+ adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
+ mutex_unlock(&adev->firmware.mutex);
+ return -EINVAL;
+}
+
+static int psp_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+
+ if (psp->ta_fw) {
+ psp_ras_terminate(psp);
+ psp_securedisplay_terminate(psp);
+ psp_rap_terminate(psp);
+ psp_dtm_terminate(psp);
+ psp_hdcp_terminate(psp);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
+ }
+
+ psp_asd_terminate(psp);
+ psp_tmr_terminate(psp);
+
+ psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+
+ return 0;
+}
+
+static int psp_suspend(void *handle)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+ psp->xgmi_context.context.initialized) {
+ ret = psp_xgmi_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate xgmi ta\n");
+ goto out;
+ }
+ }
+
+ if (psp->ta_fw) {
+ ret = psp_ras_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate ras ta\n");
+ goto out;
+ }
+ ret = psp_hdcp_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate hdcp ta\n");
+ goto out;
+ }
+ ret = psp_dtm_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate dtm ta\n");
+ goto out;
+ }
+ ret = psp_rap_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate rap ta\n");
+ goto out;
+ }
+ ret = psp_securedisplay_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate securedisplay ta\n");
+ goto out;
+ }
+ }
+
+ ret = psp_asd_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate asd\n");
+ goto out;
+ }
+
+ ret = psp_tmr_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate tmr\n");
+ goto out;
+ }
+
+ ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
+ if (ret)
+ DRM_ERROR("PSP ring stop failed\n");
+
+out:
+ return ret;
+}
+
+static int psp_resume(void *handle)
+{
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct psp_context *psp = &adev->psp;
+
+ DRM_INFO("PSP is resuming...\n");
+
+ if (psp->mem_train_ctx.enable_mem_training) {
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+ }
+
+ mutex_lock(&adev->firmware.mutex);
+
+ ret = psp_hw_start(psp);
+ if (ret)
+ goto failed;
+
+ ret = psp_load_non_psp_fw(psp);
+ if (ret)
+ goto failed;
+
+ ret = psp_asd_initialize(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ goto failed;
+ }
+
+ ret = psp_rl_load(adev);
+ if (ret) {
+ dev_err(adev->dev, "PSP load RL failed!\n");
+ goto failed;
+ }
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp, false, true);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+
+ if (psp->ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+
+ ret = psp_rap_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAP: Failed to initialize RAP\n");
+
+ ret = psp_securedisplay_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "SECUREDISPLAY: Failed to initialize SECUREDISPLAY\n");
+ }
+
+ mutex_unlock(&adev->firmware.mutex);
+
+ return 0;
+
+failed:
+ DRM_ERROR("PSP resume failed\n");
+ mutex_unlock(&adev->firmware.mutex);
+ return ret;
+}
+
+int psp_gpu_reset(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_mode1_reset(&adev->psp);
+ mutex_unlock(&adev->psp.mutex);
+
+ return ret;
+}
+
+int psp_rlc_autoload_start(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index)
+{
+ unsigned int psp_write_ptr_reg = 0;
+ struct psp_gfx_rb_frame *write_frame;
+ struct psp_ring *ring = &psp->km_ring;
+ struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+ struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+ ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t ring_size_dw = ring->ring_size / 4;
+ uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+ /* KM (GPCOM) prepare write pointer */
+ psp_write_ptr_reg = psp_ring_get_wptr(psp);
+
+ /* Update KM RB frame pointer to new frame */
+ /* write_frame ptr increments by size of rb_frame in bytes */
+ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+ if ((psp_write_ptr_reg % ring_size_dw) == 0)
+ write_frame = ring_buffer_start;
+ else
+ write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+ /* Check invalid write_frame ptr address */
+ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+ DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ return -EINVAL;
+ }
+
+ /* Initialize KM RB frame */
+ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+ /* Update KM RB frame */
+ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+ write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+ write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+ write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+ write_frame->fence_value = index;
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* Update the write Pointer in DWORDs */
+ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+ psp_ring_set_wptr(psp, psp_write_ptr_reg);
+ return 0;
+}
+
+int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char fw_name[PSP_FW_NAME_LEN];
+ const struct psp_firmware_header_v1_0 *asd_hdr;
+ int err = 0;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, fw_name);
+ if (err)
+ goto out;
+
+ asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
+ adev->psp.asd_context.bin_desc.fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+ adev->psp.asd_context.bin_desc.feature_version = le32_to_cpu(asd_hdr->sos.fw_version);
+ adev->psp.asd_context.bin_desc.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+ adev->psp.asd_context.bin_desc.start_addr = (uint8_t *)asd_hdr +
+ le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.asd_fw);
+ return err;
+}
+
+int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char fw_name[PSP_FW_NAME_LEN];
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static int parse_sos_bin_descriptor(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc,
+ const struct psp_firmware_header_v2_0 *sos_hdr)
+{
+ uint8_t *ucode_start_addr = NULL;
+
+ if (!psp || !desc || !sos_hdr)
+ return -EINVAL;
+
+ ucode_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(desc->offset_bytes) +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+ switch (desc->fw_type) {
+ case PSP_FW_TYPE_PSP_SOS:
+ psp->sos.fw_version = le32_to_cpu(desc->fw_version);
+ psp->sos.feature_version = le32_to_cpu(desc->fw_version);
+ psp->sos.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->sos.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SYS_DRV:
+ psp->sys.fw_version = le32_to_cpu(desc->fw_version);
+ psp->sys.feature_version = le32_to_cpu(desc->fw_version);
+ psp->sys.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->sys.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_KDB:
+ psp->kdb.fw_version = le32_to_cpu(desc->fw_version);
+ psp->kdb.feature_version = le32_to_cpu(desc->fw_version);
+ psp->kdb.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->kdb.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_TOC:
+ psp->toc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->toc.feature_version = le32_to_cpu(desc->fw_version);
+ psp->toc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->toc.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SPL:
+ psp->spl.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spl.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spl.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spl.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_RL:
+ psp->rl.fw_version = le32_to_cpu(desc->fw_version);
+ psp->rl.feature_version = le32_to_cpu(desc->fw_version);
+ psp->rl.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->rl.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SOC_DRV:
+ psp->soc_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->soc_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->soc_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->soc_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_INTF_DRV:
+ psp->intf_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->intf_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->intf_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->intf_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_DBG_DRV:
+ psp->dbg_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->dbg_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->dbg_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_drv.start_addr = ucode_start_addr;
+ break;
+ default:
+ dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
+ break;
+ }
+
+ return 0;
+}
+
+static int psp_init_sos_base_fw(struct amdgpu_device *adev)
+{
+ const struct psp_firmware_header_v1_0 *sos_hdr;
+ const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
+ uint8_t *ucode_array_start_addr;
+
+ sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+ ucode_array_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2))) {
+ adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
+ adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version);
+
+ adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes);
+ adev->psp.sys.start_addr = ucode_array_start_addr;
+
+ adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes);
+ adev->psp.sos.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr->sos.offset_bytes);
+ } else {
+ /* Load alternate PSP SOS FW */
+ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+
+ adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+ adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+
+ adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes);
+ adev->psp.sys.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes);
+
+ adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes);
+ adev->psp.sos.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes);
+ }
+
+ if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) {
+ dev_warn(adev->dev, "PSP SOS FW not available");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char fw_name[PSP_FW_NAME_LEN];
+ const struct psp_firmware_header_v1_0 *sos_hdr;
+ const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
+ const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
+ const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
+ const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
+ int err = 0;
+ uint8_t *ucode_array_start_addr;
+ int fw_index = 0;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, fw_name);
+ if (err)
+ goto out;
+
+ sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+ ucode_array_start_addr = (uint8_t *)sos_hdr +
+ le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+ amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
+
+ switch (sos_hdr->header.header_version_major) {
+ case 1:
+ err = psp_init_sos_base_fw(adev);
+ if (err)
+ goto out;
+
+ if (sos_hdr->header.header_version_minor == 1) {
+ sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
+ adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes);
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes);
+ adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes);
+ }
+ if (sos_hdr->header.header_version_minor == 2) {
+ sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes);
+ adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+ le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes);
+ }
+ if (sos_hdr->header.header_version_minor == 3) {
+ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+ adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes);
+ adev->psp.toc.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes);
+ adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes);
+ adev->psp.kdb.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes);
+ adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes);
+ adev->psp.spl.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes);
+ adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes);
+ adev->psp.rl.start_addr = ucode_array_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes);
+ }
+ break;
+ case 2:
+ sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
+
+ if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+ dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
+ err = parse_sos_bin_descriptor(psp,
+ &sos_hdr_v2_0->psp_fw_bin[fw_index],
+ sos_hdr_v2_0);
+ if (err)
+ goto out;
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "unsupported psp sos firmware\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.sos_fw);
+
+ return err;
+}
+
+static int parse_ta_bin_descriptor(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc,
+ const struct ta_firmware_header_v2_0 *ta_hdr)
+{
+ uint8_t *ucode_start_addr = NULL;
+
+ if (!psp || !desc || !ta_hdr)
+ return -EINVAL;
+
+ ucode_start_addr = (uint8_t *)ta_hdr +
+ le32_to_cpu(desc->offset_bytes) +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ switch (desc->fw_type) {
+ case TA_FW_TYPE_PSP_ASD:
+ psp->asd_context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->asd_context.bin_desc.feature_version = le32_to_cpu(desc->fw_version);
+ psp->asd_context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->asd_context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_XGMI:
+ psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_RAS:
+ psp->ras_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ras_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ras_context.context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_HDCP:
+ psp->hdcp_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->hdcp_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->hdcp_context.context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_DTM:
+ psp->dtm_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->dtm_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->dtm_context.context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_RAP:
+ psp->rap_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
+ psp->rap_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->rap_context.context.bin_desc.start_addr = ucode_start_addr;
+ break;
+ case TA_FW_TYPE_PSP_SECUREDISPLAY:
+ psp->securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(desc->fw_version);
+ psp->securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(desc->size_bytes);
+ psp->securedisplay_context.context.bin_desc.start_addr =
+ ucode_start_addr;
+ break;
+ default:
+ dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type);
+ break;
+ }
+
+ return 0;
+}
+
+static int parse_ta_v1_microcode(struct psp_context *psp)
+{
+ const struct ta_firmware_header_v1_0 *ta_hdr;
+ struct amdgpu_device *adev = psp->adev;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data;
+
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
+ return -EINVAL;
+
+ adev->psp.xgmi_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->xgmi.fw_version);
+ adev->psp.xgmi_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->xgmi.size_bytes);
+ adev->psp.xgmi_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ras_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->ras.fw_version);
+ adev->psp.ras_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->ras.size_bytes);
+ adev->psp.ras_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->ras.offset_bytes);
+
+ adev->psp.hdcp_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->hdcp.fw_version);
+ adev->psp.hdcp_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->hdcp.size_bytes);
+ adev->psp.hdcp_context.context.bin_desc.start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.dtm_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->dtm.fw_version);
+ adev->psp.dtm_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->dtm.size_bytes);
+ adev->psp.dtm_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+ return 0;
+}
+
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+ const struct ta_firmware_header_v2_0 *ta_hdr;
+ struct amdgpu_device *adev = psp->adev;
+ int err = 0;
+ int ta_index = 0;
+
+ ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
+
+ if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+ return -EINVAL;
+
+ if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+ dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
+ return -EINVAL;
+ }
+
+ for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) {
+ err = parse_ta_bin_descriptor(psp,
+ &ta_hdr->ta_fw_bin[ta_index],
+ ta_hdr);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
+{
+ const struct common_firmware_header *hdr;
+ struct amdgpu_device *adev = psp->adev;
+ char fw_name[PSP_FW_NAME_LEN];
+ int err;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, fw_name);
+ if (err)
+ return err;
+
+ hdr = (const struct common_firmware_header *)adev->psp.ta_fw->data;
+ switch (le16_to_cpu(hdr->header_version_major)) {
+ case 1:
+ err = parse_ta_v1_microcode(psp);
+ break;
+ case 2:
+ err = parse_ta_v2_microcode(psp);
+ break;
+ default:
+ dev_err(adev->dev, "unsupported TA header version\n");
+ err = -EINVAL;
+ }
+
+ if (err)
+ amdgpu_ucode_release(&adev->psp.ta_fw);
+
+ return err;
+}
+
+int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char fw_name[PSP_FW_NAME_LEN];
+ const struct psp_firmware_header_v1_0 *cap_hdr_v1_0;
+ struct amdgpu_firmware_info *info = NULL;
+ int err = 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ dev_err(adev->dev, "cap microcode should only be loaded under SRIOV\n");
+ return -EINVAL;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, fw_name);
+ if (err) {
+ if (err == -ENODEV) {
+ dev_warn(adev->dev, "cap microcode does not exist, skip\n");
+ err = 0;
+ goto out;
+ }
+ dev_err(adev->dev, "fail to initialize cap microcode\n");
+ }
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
+ info->ucode_id = AMDGPU_UCODE_ID_CAP;
+ info->fw = adev->psp.cap_fw;
+ cap_hdr_v1_0 = (const struct psp_firmware_header_v1_0 *)
+ adev->psp.cap_fw->data;
+ adev->firmware.fw_size += ALIGN(
+ le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes), PAGE_SIZE);
+ adev->psp.cap_fw_version = le32_to_cpu(cap_hdr_v1_0->header.ucode_version);
+ adev->psp.cap_feature_version = le32_to_cpu(cap_hdr_v1_0->sos.fw_version);
+ adev->psp.cap_ucode_size = le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes);
+
+ return 0;
+
+out:
+ amdgpu_ucode_release(&adev->psp.cap_fw);
+ return err;
+}
+
+static int psp_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int psp_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint32_t fw_ver;
+ int ret;
+
+ if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ DRM_INFO("PSP block is not ready yet.");
+ return -EBUSY;
+ }
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_read_usbc_pd_fw(&adev->psp, &fw_ver);
+ mutex_unlock(&adev->psp.mutex);
+
+ if (ret) {
+ DRM_ERROR("Failed to read USBC PD FW, err = %d", ret);
+ return ret;
+ }
+
+ return sysfs_emit(buf, "%x\n", fw_ver);
+}
+
+static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int ret, idx;
+ char fw_name[100];
+ const struct firmware *usbc_pd_fw;
+ struct amdgpu_bo *fw_buf_bo = NULL;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_cpu_addr;
+
+ if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ DRM_INFO("PSP block is not ready yet.");
+ return -EBUSY;
+ }
+
+ if (!drm_dev_enter(ddev, &idx))
+ return -ENODEV;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf);
+ ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev);
+ if (ret)
+ goto fail;
+
+ /* LFB address which is aligned to 1MB boundary per PSP request */
+ ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &fw_buf_bo, &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
+ if (ret)
+ goto rel_buf;
+
+ memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size);
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr);
+ mutex_unlock(&adev->psp.mutex);
+
+ amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+ release_firmware(usbc_pd_fw);
+fail:
+ if (ret) {
+ DRM_ERROR("Failed to load USBC PD FW, err = %d", ret);
+ count = ret;
+ }
+
+ drm_dev_exit(idx);
+ return count;
+}
+
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size)
+{
+ int idx;
+
+ if (!drm_dev_enter(adev_to_drm(psp->adev), &idx))
+ return;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, start_addr, bin_size);
+
+ drm_dev_exit(idx);
+}
+
+/**
+ * DOC: usbc_pd_fw
+ * Reading from this file will retrieve the USB-C PD firmware version. Writing to
+ * this file will trigger the update process.
+ */
+static DEVICE_ATTR(usbc_pd_fw, 0644,
+ psp_usbc_pd_fw_sysfs_read,
+ psp_usbc_pd_fw_sysfs_write);
+
+int is_psp_fw_valid(struct psp_bin_desc bin)
+{
+ return bin.size_bytes;
+}
+
+static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ adev->psp.vbflash_done = false;
+
+ /* Safeguard against memory drain */
+ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
+ dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B);
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+ return -ENOMEM;
+ }
+
+ /* TODO Just allocate max for now and optimize to realloc later if needed */
+ if (!adev->psp.vbflash_tmp_buf) {
+ adev->psp.vbflash_tmp_buf = kvmalloc(AMD_VBIOS_FILE_MAX_SIZE_B, GFP_KERNEL);
+ if (!adev->psp.vbflash_tmp_buf)
+ return -ENOMEM;
+ }
+
+ mutex_lock(&adev->psp.mutex);
+ memcpy(adev->psp.vbflash_tmp_buf + pos, buffer, count);
+ adev->psp.vbflash_image_size += count;
+ mutex_unlock(&adev->psp.mutex);
+
+ dev_dbg(adev->dev, "IFWI staged for update");
+
+ return count;
+}
+
+static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buffer,
+ loff_t pos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_bo *fw_buf_bo = NULL;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_cpu_addr;
+ int ret;
+
+ if (adev->psp.vbflash_image_size == 0)
+ return -EINVAL;
+
+ dev_dbg(adev->dev, "PSP IFWI flash process initiated");
+
+ ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &fw_buf_bo,
+ &fw_pri_mc_addr,
+ &fw_pri_cpu_addr);
+ if (ret)
+ goto rel_buf;
+
+ memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, adev->psp.vbflash_image_size);
+
+ mutex_lock(&adev->psp.mutex);
+ ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr);
+ mutex_unlock(&adev->psp.mutex);
+
+ amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+ kvfree(adev->psp.vbflash_tmp_buf);
+ adev->psp.vbflash_tmp_buf = NULL;
+ adev->psp.vbflash_image_size = 0;
+
+ if (ret) {
+ dev_err(adev->dev, "Failed to load IFWI, err = %d", ret);
+ return ret;
+ }
+
+ dev_dbg(adev->dev, "PSP IFWI flash process done");
+ return 0;
+}
+
+/**
+ * DOC: psp_vbflash
+ * Writing to this file will stage an IFWI for update. Reading from this file
+ * will trigger the update process.
+ */
+static struct bin_attribute psp_vbflash_bin_attr = {
+ .attr = {.name = "psp_vbflash", .mode = 0660},
+ .size = 0,
+ .write = amdgpu_psp_vbflash_write,
+ .read = amdgpu_psp_vbflash_read,
+};
+
+/**
+ * DOC: psp_vbflash_status
+ * The status of the flash process.
+ * 0: IFWI flash not complete.
+ * 1: IFWI flash complete.
+ */
+static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint32_t vbflash_status;
+
+ vbflash_status = psp_vbflash_status(&adev->psp);
+ if (!adev->psp.vbflash_done)
+ vbflash_status = 0;
+ else if (adev->psp.vbflash_done && !(vbflash_status & 0x80000000))
+ vbflash_status = 1;
+
+ return sysfs_emit(buf, "0x%x\n", vbflash_status);
+}
+static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
+
+static struct bin_attribute *bin_flash_attrs[] = {
+ &psp_vbflash_bin_attr,
+ NULL
+};
+
+static struct attribute *flash_attrs[] = {
+ &dev_attr_psp_vbflash_status.attr,
+ &dev_attr_usbc_pd_fw.attr,
+ NULL
+};
+
+static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_usbc_pd_fw.attr)
+ return adev->psp.sup_pd_fw_up ? 0660 : 0;
+
+ return adev->psp.sup_ifwi_up ? 0440 : 0;
+}
+
+static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
+ struct bin_attribute *attr,
+ int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return adev->psp.sup_ifwi_up ? 0660 : 0;
+}
+
+const struct attribute_group amdgpu_flash_attr_group = {
+ .attrs = flash_attrs,
+ .bin_attrs = bin_flash_attrs,
+ .is_bin_visible = amdgpu_bin_flash_attr_is_visible,
+ .is_visible = amdgpu_flash_attr_is_visible,
+};
+
+const struct amd_ip_funcs psp_ip_funcs = {
+ .name = "psp",
+ .early_init = psp_early_init,
+ .late_init = NULL,
+ .sw_init = psp_sw_init,
+ .sw_fini = psp_sw_fini,
+ .hw_init = psp_hw_init,
+ .hw_fini = psp_hw_fini,
+ .suspend = psp_suspend,
+ .resume = psp_resume,
+ .is_idle = NULL,
+ .check_soft_reset = NULL,
+ .wait_for_idle = NULL,
+ .soft_reset = NULL,
+ .set_clockgating_state = psp_set_clockgating_state,
+ .set_powergating_state = psp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 11,
+ .minor = 0,
+ .rev = 8,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v13_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 4,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
new file mode 100644
index 000000000..3e67ed63e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+#ifndef __AMDGPU_PSP_H__
+#define __AMDGPU_PSP_H__
+
+#include "amdgpu.h"
+#include "psp_gfx_if.h"
+#include "ta_xgmi_if.h"
+#include "ta_ras_if.h"
+#include "ta_rap_if.h"
+#include "ta_secureDisplay_if.h"
+
+#define PSP_FENCE_BUFFER_SIZE 0x1000
+#define PSP_CMD_BUFFER_SIZE 0x1000
+#define PSP_1_MEG 0x100000
+#define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT 0x100000
+#define PSP_FW_NAME_LEN 0x24
+
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+enum psp_shared_mem_size {
+ PSP_ASD_SHARED_MEM_SIZE = 0x0,
+ PSP_XGMI_SHARED_MEM_SIZE = 0x4000,
+ PSP_RAS_SHARED_MEM_SIZE = 0x4000,
+ PSP_HDCP_SHARED_MEM_SIZE = 0x4000,
+ PSP_DTM_SHARED_MEM_SIZE = 0x4000,
+ PSP_RAP_SHARED_MEM_SIZE = 0x4000,
+ PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000,
+};
+
+enum ta_type_id {
+ TA_TYPE_XGMI = 1,
+ TA_TYPE_RAS,
+ TA_TYPE_HDCP,
+ TA_TYPE_DTM,
+ TA_TYPE_RAP,
+ TA_TYPE_SECUREDISPLAY,
+
+ TA_TYPE_MAX_INDEX,
+};
+
+struct psp_context;
+struct psp_xgmi_node_info;
+struct psp_xgmi_topology_info;
+struct psp_bin_desc;
+
+enum psp_bootloader_cmd {
+ PSP_BL__LOAD_SYSDRV = 0x10000,
+ PSP_BL__LOAD_SOSDRV = 0x20000,
+ PSP_BL__LOAD_KEY_DATABASE = 0x80000,
+ PSP_BL__LOAD_SOCDRV = 0xB0000,
+ PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_INTFDRV = 0xD0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__DRAM_LONG_TRAIN = 0x100000,
+ PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
+ PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+};
+
+enum psp_ring_type {
+ PSP_RING_TYPE__INVALID = 0,
+ /*
+ * These values map to the way the PSP kernel identifies the
+ * rings.
+ */
+ PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */
+ PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
+};
+
+struct psp_ring {
+ enum psp_ring_type ring_type;
+ struct psp_gfx_rb_frame *ring_mem;
+ uint64_t ring_mem_mc_addr;
+ void *ring_mem_handle;
+ uint32_t ring_size;
+ uint32_t ring_wptr;
+};
+
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+ PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
+ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
+ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_LAST
+};
+
+struct psp_funcs {
+ int (*init_microcode)(struct psp_context *psp);
+ int (*wait_for_bootloader)(struct psp_context *psp);
+ int (*bootloader_load_kdb)(struct psp_context *psp);
+ int (*bootloader_load_spl)(struct psp_context *psp);
+ int (*bootloader_load_sysdrv)(struct psp_context *psp);
+ int (*bootloader_load_soc_drv)(struct psp_context *psp);
+ int (*bootloader_load_intf_drv)(struct psp_context *psp);
+ int (*bootloader_load_dbg_drv)(struct psp_context *psp);
+ int (*bootloader_load_ras_drv)(struct psp_context *psp);
+ int (*bootloader_load_sos)(struct psp_context *psp);
+ int (*ring_create)(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+ int (*ring_stop)(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+ int (*ring_destroy)(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+ bool (*smu_reload_quirk)(struct psp_context *psp);
+ int (*mode1_reset)(struct psp_context *psp);
+ int (*mem_training)(struct psp_context *psp, uint32_t ops);
+ uint32_t (*ring_get_wptr)(struct psp_context *psp);
+ void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
+ int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
+ int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*vbflash_stat)(struct psp_context *psp);
+ int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+};
+
+struct ta_funcs {
+ int (*fn_ta_initialize)(struct psp_context *psp);
+ int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id);
+ int (*fn_ta_terminate)(struct psp_context *psp);
+};
+
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
+struct psp_xgmi_node_info {
+ uint64_t node_id;
+ uint8_t num_hops;
+ uint8_t is_sharing_enabled;
+ enum ta_xgmi_assigned_sdma_engine sdma_engine;
+ uint8_t num_links;
+};
+
+struct psp_xgmi_topology_info {
+ uint32_t num_nodes;
+ struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
+struct psp_bin_desc {
+ uint32_t fw_version;
+ uint32_t feature_version;
+ uint32_t size_bytes;
+ uint8_t *start_addr;
+};
+
+struct ta_mem_context {
+ struct amdgpu_bo *shared_bo;
+ uint64_t shared_mc_addr;
+ void *shared_buf;
+ enum psp_shared_mem_size shared_mem_size;
+};
+
+struct ta_context {
+ bool initialized;
+ uint32_t session_id;
+ uint32_t resp_status;
+ struct ta_mem_context mem_context;
+ struct psp_bin_desc bin_desc;
+ enum psp_gfx_cmd_id ta_load_type;
+ enum ta_type_id ta_type;
+};
+
+struct ta_cp_context {
+ struct ta_context context;
+ struct mutex mutex;
+};
+
+struct psp_xgmi_context {
+ struct ta_context context;
+ struct psp_xgmi_topology_info top_info;
+ bool supports_extended_data;
+};
+
+struct psp_ras_context {
+ struct ta_context context;
+ struct amdgpu_ras *ras;
+};
+
+#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
+#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
+#define GDDR6_MEM_TRAINING_OFFSET 0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+
+enum psp_memory_training_init_flag {
+ PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
+ PSP_MEM_TRAIN_SUPPORT = 0x1,
+ PSP_MEM_TRAIN_INIT_FAILED = 0x2,
+ PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4,
+ PSP_MEM_TRAIN_INIT_SUCCESS = 0x8,
+};
+
+enum psp_memory_training_ops {
+ PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1,
+ PSP_MEM_TRAIN_SAVE = 0x2,
+ PSP_MEM_TRAIN_RESTORE = 0x4,
+ PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8,
+ PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG,
+ PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG,
+};
+
+struct psp_memory_training_context {
+ /*training data size*/
+ u64 train_data_size;
+ /*
+ * sys_cache
+ * cpu virtual address
+ * system memory buffer that used to store the training data.
+ */
+ void *sys_cache;
+
+ /*vram offset of the p2c training data*/
+ u64 p2c_train_data_offset;
+
+ /*vram offset of the c2p training data*/
+ u64 c2p_train_data_offset;
+ struct amdgpu_bo *c2p_bo;
+
+ enum psp_memory_training_init_flag init;
+ u32 training_cnt;
+ bool enable_mem_training;
+};
+
+/** PSP runtime DB **/
+#define PSP_RUNTIME_DB_SIZE_IN_BYTES 0x10000
+#define PSP_RUNTIME_DB_OFFSET 0x100000
+#define PSP_RUNTIME_DB_COOKIE_ID 0x0ed5
+#define PSP_RUNTIME_DB_VER_1 0x0100
+#define PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT 0x40
+
+enum psp_runtime_entry_type {
+ PSP_RUNTIME_ENTRY_TYPE_INVALID = 0x0,
+ PSP_RUNTIME_ENTRY_TYPE_TEST = 0x1,
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_COMMON = 0x2, /* Common mGPU runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL = 0x3, /* WAFL runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI = 0x4, /* XGMI runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG = 0x5, /* Boot Config runtime data */
+ PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS = 0x6, /* SCPM validation data */
+};
+
+/* PSP runtime DB header */
+struct psp_runtime_data_header {
+ /* determine the existence of runtime db */
+ uint16_t cookie;
+ /* version of runtime db */
+ uint16_t version;
+};
+
+/* PSP runtime DB entry */
+struct psp_runtime_entry {
+ /* type of runtime db entry */
+ uint32_t entry_type;
+ /* offset of entry in bytes */
+ uint16_t offset;
+ /* size of entry in bytes */
+ uint16_t size;
+};
+
+/* PSP runtime DB directory */
+struct psp_runtime_data_directory {
+ /* number of valid entries */
+ uint16_t entry_count;
+ /* db entries*/
+ struct psp_runtime_entry entry_list[PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT];
+};
+
+/* PSP runtime DB boot config feature bitmask */
+enum psp_runtime_boot_cfg_feature {
+ BOOT_CFG_FEATURE_GECC = 0x1,
+ BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING = 0x2,
+};
+
+/* PSP run time DB SCPM authentication defines */
+enum psp_runtime_scpm_authentication {
+ SCPM_DISABLE = 0x0,
+ SCPM_ENABLE = 0x1,
+ SCPM_ENABLE_WITH_SCPM_ERR = 0x2,
+};
+
+/* PSP runtime DB boot config entry */
+struct psp_runtime_boot_cfg_entry {
+ uint32_t boot_cfg_bitmask;
+ uint32_t reserved;
+};
+
+/* PSP runtime DB SCPM entry */
+struct psp_runtime_scpm_entry {
+ enum psp_runtime_scpm_authentication scpm_status;
+};
+
+struct psp_context {
+ struct amdgpu_device *adev;
+ struct psp_ring km_ring;
+ struct psp_gfx_cmd_resp *cmd;
+
+ const struct psp_funcs *funcs;
+ const struct ta_funcs *ta_funcs;
+
+ /* firmware buffer */
+ struct amdgpu_bo *fw_pri_bo;
+ uint64_t fw_pri_mc_addr;
+ void *fw_pri_buf;
+
+ /* sos firmware */
+ const struct firmware *sos_fw;
+ struct psp_bin_desc sys;
+ struct psp_bin_desc sos;
+ struct psp_bin_desc toc;
+ struct psp_bin_desc kdb;
+ struct psp_bin_desc spl;
+ struct psp_bin_desc rl;
+ struct psp_bin_desc soc_drv;
+ struct psp_bin_desc intf_drv;
+ struct psp_bin_desc dbg_drv;
+ struct psp_bin_desc ras_drv;
+
+ /* tmr buffer */
+ struct amdgpu_bo *tmr_bo;
+ uint64_t tmr_mc_addr;
+
+ /* asd firmware */
+ const struct firmware *asd_fw;
+
+ /* toc firmware */
+ const struct firmware *toc_fw;
+
+ /* cap firmware */
+ const struct firmware *cap_fw;
+
+ /* fence buffer */
+ struct amdgpu_bo *fence_buf_bo;
+ uint64_t fence_buf_mc_addr;
+ void *fence_buf;
+
+ /* cmd buffer */
+ struct amdgpu_bo *cmd_buf_bo;
+ uint64_t cmd_buf_mc_addr;
+ struct psp_gfx_cmd_resp *cmd_buf_mem;
+
+ /* fence value associated with cmd buffer */
+ atomic_t fence_value;
+ /* flag to mark whether gfx fw autoload is supported or not */
+ bool autoload_supported;
+ /* flag to mark whether df cstate management centralized to PMFW */
+ bool pmfw_centralized_cstate_management;
+
+ /* xgmi ta firmware and buffer */
+ const struct firmware *ta_fw;
+ uint32_t ta_fw_version;
+
+ uint32_t cap_fw_version;
+ uint32_t cap_feature_version;
+ uint32_t cap_ucode_size;
+
+ struct ta_context asd_context;
+ struct psp_xgmi_context xgmi_context;
+ struct psp_ras_context ras_context;
+ struct ta_cp_context hdcp_context;
+ struct ta_cp_context dtm_context;
+ struct ta_cp_context rap_context;
+ struct ta_cp_context securedisplay_context;
+ struct mutex mutex;
+ struct psp_memory_training_context mem_train_ctx;
+
+ uint32_t boot_cfg_bitmask;
+
+ /* firmware upgrades supported */
+ bool sup_pd_fw_up;
+ bool sup_ifwi_up;
+
+ char *vbflash_tmp_buf;
+ size_t vbflash_image_size;
+ bool vbflash_done;
+};
+
+struct amdgpu_psp_funcs {
+ bool (*check_fw_loading_status)(struct amdgpu_device *adev,
+ enum AMDGPU_UCODE_ID);
+};
+
+
+#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
+#define psp_init_microcode(psp) \
+ ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
+#define psp_bootloader_load_kdb(psp) \
+ ((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0)
+#define psp_bootloader_load_spl(psp) \
+ ((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0)
+#define psp_bootloader_load_sysdrv(psp) \
+ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
+#define psp_bootloader_load_soc_drv(psp) \
+ ((psp)->funcs->bootloader_load_soc_drv ? (psp)->funcs->bootloader_load_soc_drv((psp)) : 0)
+#define psp_bootloader_load_intf_drv(psp) \
+ ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0)
+#define psp_bootloader_load_dbg_drv(psp) \
+ ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0)
+#define psp_bootloader_load_ras_drv(psp) \
+ ((psp)->funcs->bootloader_load_ras_drv ? \
+ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_sos(psp) \
+ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
+#define psp_smu_reload_quirk(psp) \
+ ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
+#define psp_mode1_reset(psp) \
+ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
+#define psp_mem_training(psp, ops) \
+ ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
+
+#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
+#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
+
+#define psp_load_usbc_pd_fw(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->load_usbc_pd_fw ? \
+ (psp)->funcs->load_usbc_pd_fw((psp), (fw_pri_mc_addr)) : -EINVAL)
+
+#define psp_read_usbc_pd_fw(psp, fw_ver) \
+ ((psp)->funcs->read_usbc_pd_fw ? \
+ (psp)->funcs->read_usbc_pd_fw((psp), fw_ver) : -EINVAL)
+
+#define psp_update_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->update_spirom ? \
+ (psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_vbflash_status(psp) \
+ ((psp)->funcs->vbflash_stat ? \
+ (psp)->funcs->vbflash_stat((psp)) : -EINVAL)
+
+#define psp_fatal_error_recovery_quirk(psp) \
+ ((psp)->funcs->fatal_error_recovery_quirk ? \
+ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+
+extern const struct amd_ip_funcs psp_ip_funcs;
+
+extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
+extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
+extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
+
+extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, bool check_changed);
+extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
+
+int psp_execute_ip_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode);
+
+int psp_gpu_reset(struct amdgpu_device *adev);
+
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx);
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx);
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
+int psp_ta_load(struct psp_context *psp, struct ta_context *context);
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context);
+
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
+int psp_xgmi_terminate(struct psp_context *psp);
+int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id);
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id);
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+ int number_devices,
+ struct psp_xgmi_topology_info *topology,
+ bool get_extended_data);
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+ int number_devices,
+ struct psp_xgmi_topology_info *topology);
+int psp_ras_initialize(struct psp_context *psp);
+int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_ras_enable_features(struct psp_context *psp,
+ union ta_ras_cmd_input *info, bool enable);
+int psp_ras_trigger_error(struct psp_context *psp,
+ struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
+int psp_ras_terminate(struct psp_context *psp);
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status);
+int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+
+int psp_rlc_autoload_start(struct psp_context *psp);
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value);
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index);
+int psp_init_asd_microcode(struct psp_context *psp,
+ const char *chip_name);
+int psp_init_toc_microcode(struct psp_context *psp,
+ const char *chip_name);
+int psp_init_sos_microcode(struct psp_context *psp,
+ const char *chip_name);
+int psp_init_ta_microcode(struct psp_context *psp,
+ const char *chip_name);
+int psp_init_cap_microcode(struct psp_context *psp,
+ const char *chip_name);
+int psp_get_fw_attestation_records_addr(struct psp_context *psp,
+ uint64_t *output_ptr);
+
+int psp_load_fw_list(struct psp_context *psp,
+ struct amdgpu_firmware_info **ucode_list, int ucode_count);
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
+
+int psp_spatial_partition(struct psp_context *psp, int mode);
+
+int is_psp_fw_valid(struct psp_bin_desc bin);
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
new file mode 100644
index 000000000..468a67b30
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+
+static uint32_t get_bin_version(const uint8_t *bin)
+{
+ const struct common_firmware_header *hdr =
+ (const struct common_firmware_header *)bin;
+
+ return hdr->ucode_version;
+}
+
+static int prep_ta_mem_context(struct ta_mem_context *mem_context,
+ uint8_t *shared_buf,
+ uint32_t shared_buf_len)
+{
+ if (mem_context->shared_mem_size < shared_buf_len)
+ return -EINVAL;
+ memset(mem_context->shared_buf, 0, mem_context->shared_mem_size);
+ memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len);
+
+ return 0;
+}
+
+static bool is_ta_type_valid(enum ta_type_id ta_type)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct ta_funcs ras_ta_funcs = {
+ .fn_ta_initialize = psp_ras_initialize,
+ .fn_ta_invoke = psp_ras_invoke,
+ .fn_ta_terminate = psp_ras_terminate
+};
+
+static void set_ta_context_funcs(struct psp_context *psp,
+ enum ta_type_id ta_type,
+ struct ta_context **pcontext)
+{
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ *pcontext = &psp->ras_context.context;
+ psp->ta_funcs = &ras_ta_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static const struct file_operations ta_load_debugfs_fops = {
+ .write = ta_if_load_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_unload_debugfs_fops = {
+ .write = ta_if_unload_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_invoke_debugfs_fops = {
+ .write = ta_if_invoke_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+/*
+ * DOC: AMDGPU TA debugfs interfaces
+ *
+ * Three debugfs interfaces can be opened by a program to
+ * load/invoke/unload TA,
+ *
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_load
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload
+ *
+ * How to use the interfaces in a program?
+ *
+ * A program needs to provide transmit buffer to the interfaces
+ * and will receive buffer from the interfaces below,
+ *
+ * - For TA load debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA bin length (4bytes)
+ * - TA bin
+ * Receive buffer:
+ * - TA ID (4bytes)
+ *
+ * - For TA invoke debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ * - TA CMD ID (4bytes)
+ * - TA shard buf length
+ * (4bytes, value not beyond TA shared memory size)
+ * - TA shared buf
+ * Receive buffer:
+ * - TA shared buf
+ *
+ * - For TA unload debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA ID (4bytes)
+ */
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_bin_len = 0;
+ uint8_t *ta_bin = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
+ if (!ta_bin)
+ return -ENOMEM;
+ if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
+ ret = -EFAULT;
+ goto err_free_bin;
+ }
+
+ /* Set TA context and functions */
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_bin;
+ }
+
+ /*
+ * Allocate TA shared buf in case shared buf was freed
+ * due to loading TA failed before.
+ */
+ if (!context->mem_context.shared_buf) {
+ ret = psp_ta_init_shared_buf(psp, &context->mem_context);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_free_bin;
+ }
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev,
+ "Failed to unload embedded TA (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ /* Prepare TA context for TA initialization */
+ context->ta_type = ta_type;
+ context->bin_desc.fw_version = get_bin_version(ta_bin);
+ context->bin_desc.size_bytes = ta_bin_len;
+ context->bin_desc.start_addr = ta_bin;
+
+ if (!psp->ta_funcs->fn_ta_initialize) {
+ dev_err(adev->dev, "Unsupported function to initialize TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_ta_shared_buf;
+ }
+
+ ret = psp_fn_ta_initialize(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))
+ ret = -EFAULT;
+
+err_free_ta_shared_buf:
+ /* Only free TA shared buf when returns error code */
+ if (ret && context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+err_free_bin:
+ kfree(ta_bin);
+
+ return ret;
+}
+
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EFAULT;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+
+ set_ta_context_funcs(psp, ta_type, &context);
+ context->session_id = ta_id;
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+ dev_err(adev->dev, "Unsupported function to terminate TA\n");
+ return -EOPNOTSUPP;
+ }
+
+ ret = psp_fn_ta_terminate(psp);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ }
+
+ if (context->mem_context.shared_buf)
+ psp_ta_free_shared_buf(&context->mem_context);
+
+ return ret;
+}
+
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_id = 0;
+ uint32_t cmd_id = 0;
+ uint32_t shared_buf_len = 0;
+ uint8_t *shared_buf = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context *context = NULL;
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EFAULT;
+ copy_pos += sizeof(uint32_t);
+
+ shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
+ if (!shared_buf)
+ return -ENOMEM;
+ if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) {
+ ret = -EFAULT;
+ goto err_free_shared_buf;
+ }
+
+ set_ta_context_funcs(psp, ta_type, &context);
+
+ if (!context->initialized) {
+ dev_err(adev->dev, "TA is not initialized\n");
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+
+ if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
+ dev_err(adev->dev, "Unsupported function to invoke TA\n");
+ ret = -EOPNOTSUPP;
+ goto err_free_shared_buf;
+ }
+
+ context->session_id = ta_id;
+
+ ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
+ if (ret)
+ goto err_free_shared_buf;
+
+ ret = psp_fn_ta_invoke(psp, cmd_id);
+ if (ret || context->resp_status) {
+ dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n",
+ ret, context->resp_status);
+ if (!ret) {
+ ret = -EINVAL;
+ goto err_free_shared_buf;
+ }
+ }
+
+ if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
+ ret = -EFAULT;
+
+err_free_shared_buf:
+ kfree(shared_buf);
+
+ return ret;
+}
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root);
+
+ debugfs_create_file("ta_load", 0200, dir, adev,
+ &ta_load_debugfs_fops);
+
+ debugfs_create_file("ta_unload", 0200, dir,
+ adev, &ta_unload_debugfs_fops);
+
+ debugfs_create_file("ta_invoke", 0200, dir,
+ adev, &ta_invoke_debugfs_fops);
+}
+
+#else
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
new file mode 100644
index 000000000..14cd1c81c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PSP_TA_H__
+#define __AMDGPU_PSP_TA_H__
+
+/* Calling set_ta_context_funcs is required before using the following macros */
+#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp)))
+#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id)))
+#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp)))
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
new file mode 100644
index 000000000..123bcf5c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_rap.h"
+
+/**
+ * DOC: AMDGPU RAP debugfs test interface
+ *
+ * how to use?
+ * echo opcode > <debugfs_dir>/dri/xxx/rap_test
+ *
+ * opcode:
+ * currently, only 2 is supported by Linux host driver,
+ * opcode 2 stands for TA_CMD_RAP__VALIDATE_L0, used to
+ * trigger L0 policy validation, you can refer more detail
+ * from header file ta_rap_if.h
+ *
+ */
+static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct ta_rap_shared_memory *rap_shared_mem;
+ struct ta_rap_cmd_output_data *rap_cmd_output;
+ struct drm_device *dev = adev_to_drm(adev);
+ uint32_t op;
+ enum ta_rap_status status;
+ int ret;
+
+ if (*pos || size != 2)
+ return -EINVAL;
+
+ ret = kstrtouint_from_user(buf, size, *pos, &op);
+ if (ret)
+ return ret;
+
+ ret = pm_runtime_get_sync(dev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return ret;
+ }
+
+ /* make sure gfx core is on, RAP TA cann't handle
+ * GFX OFF case currently.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ switch (op) {
+ case 2:
+ ret = psp_rap_invoke(&adev->psp, op, &status);
+ if (!ret && status == TA_RAP_STATUS__SUCCESS) {
+ dev_info(adev->dev, "RAP L0 validate test success.\n");
+ } else {
+ rap_shared_mem = (struct ta_rap_shared_memory *)
+ adev->psp.rap_context.context.mem_context.shared_buf;
+ rap_cmd_output = &(rap_shared_mem->rap_out_message.output);
+
+ dev_info(adev->dev, "RAP test failed, the output is:\n");
+ dev_info(adev->dev, "\tlast_subsection: 0x%08x.\n",
+ rap_cmd_output->last_subsection);
+ dev_info(adev->dev, "\tnum_total_validate: 0x%08x.\n",
+ rap_cmd_output->num_total_validate);
+ dev_info(adev->dev, "\tnum_valid: 0x%08x.\n",
+ rap_cmd_output->num_valid);
+ dev_info(adev->dev, "\tlast_validate_addr: 0x%08x.\n",
+ rap_cmd_output->last_validate_addr);
+ dev_info(adev->dev, "\tlast_validate_val: 0x%08x.\n",
+ rap_cmd_output->last_validate_val);
+ dev_info(adev->dev, "\tlast_validate_val_exptd: 0x%08x.\n",
+ rap_cmd_output->last_validate_val_exptd);
+ }
+ break;
+ default:
+ dev_info(adev->dev, "Unsupported op id: %d, ", op);
+ dev_info(adev->dev, "Only support op 2(L0 validate test).\n");
+ break;
+ }
+
+ amdgpu_gfx_off_ctrl(adev, true);
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return size;
+}
+
+static const struct file_operations amdgpu_rap_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_rap_debugfs_write,
+ .llseek = default_llseek
+};
+
+void amdgpu_rap_debugfs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ if (!adev->psp.rap_context.context.initialized)
+ return;
+
+ debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root,
+ adev, &amdgpu_rap_debugfs_ops);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h
new file mode 100644
index 000000000..ec6d7632d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_RAP_H
+#define _AMDGPU_RAP_H
+
+#include "amdgpu.h"
+
+void amdgpu_rap_debugfs_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
new file mode 100644
index 000000000..6f6341f70
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -0,0 +1,3334 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_xgmi.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "nbio_v4_3.h"
+#include "nbio_v7_9.h"
+#include "atom.h"
+#include "amdgpu_reset.h"
+
+#ifdef CONFIG_X86_MCE_AMD
+#include <asm/mce.h>
+
+static bool notifier_registered;
+#endif
+static const char *RAS_FS_NAME = "ras";
+
+const char *ras_error_string[] = {
+ "none",
+ "parity",
+ "single_correctable",
+ "multi_uncorrectable",
+ "poison",
+};
+
+const char *ras_block_string[] = {
+ "umc",
+ "sdma",
+ "gfx",
+ "mmhub",
+ "athub",
+ "pcie_bif",
+ "hdp",
+ "xgmi_wafl",
+ "df",
+ "smn",
+ "sem",
+ "mp0",
+ "mp1",
+ "fuse",
+ "mca",
+ "vcn",
+ "jpeg",
+};
+
+const char *ras_mca_block_string[] = {
+ "mca_mp0",
+ "mca_mp1",
+ "mca_mpio",
+ "mca_iohc",
+};
+
+struct amdgpu_ras_block_list {
+ /* ras block link */
+ struct list_head node;
+
+ struct amdgpu_ras_block_object *ras_obj;
+};
+
+const char *get_ras_block_str(struct ras_common_if *ras_block)
+{
+ if (!ras_block)
+ return "NULL";
+
+ if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
+ return "OUT OF RANGE";
+
+ if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
+ return ras_mca_block_string[ras_block->sub_block_index];
+
+ return ras_block_string[ras_block->block];
+}
+
+#define ras_block_str(_BLOCK_) \
+ (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
+
+#define ras_err_str(i) (ras_error_string[ffs(i)])
+
+#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+
+/* inject address is 52 bits */
+#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
+
+/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
+#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
+
+enum amdgpu_ras_retire_page_reservation {
+ AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+ AMDGPU_RAS_RETIRE_PAGE_PENDING,
+ AMDGPU_RAS_RETIRE_PAGE_FAULT,
+};
+
+atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+ uint64_t addr);
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr);
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+struct mce_notifier_adev_list {
+ struct amdgpu_device *devs[MAX_GPU_INSTANCE];
+ int num_gpu;
+};
+static struct mce_notifier_adev_list mce_adev_list;
+#endif
+
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
+{
+ if (adev && amdgpu_ras_get_context(adev))
+ amdgpu_ras_get_context(adev)->error_query_ready = ready;
+}
+
+static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
+{
+ if (adev && amdgpu_ras_get_context(adev))
+ return amdgpu_ras_get_context(adev)->error_query_ready;
+
+ return false;
+}
+
+static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
+{
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct eeprom_table_record err_rec;
+
+ if ((address >= adev->gmc.mc_vram_size) ||
+ (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ dev_warn(adev->dev,
+ "RAS WARN: input address 0x%llx is invalid.\n",
+ address);
+ return -EINVAL;
+ }
+
+ if (amdgpu_ras_check_bad_page(adev, address)) {
+ dev_warn(adev->dev,
+ "RAS WARN: 0x%llx has already been marked as bad page!\n",
+ address);
+ return 0;
+ }
+
+ memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
+ err_data.err_addr = &err_rec;
+ amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
+
+ if (amdgpu_bad_page_threshold != 0) {
+ amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+ err_data.err_addr_cnt);
+ amdgpu_ras_save_bad_pages(adev, NULL);
+ }
+
+ dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
+ dev_warn(adev->dev, "Clear EEPROM:\n");
+ dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
+
+ return 0;
+}
+
+static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+ ssize_t s;
+ char val[128];
+
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
+ return -EINVAL;
+
+ /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
+ if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
+ s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
+ "ue", info.ue_count,
+ "ce", info.ce_count);
+ if (*pos >= s)
+ return 0;
+
+ s -= *pos;
+ s = min_t(u64, s, size);
+
+
+ if (copy_to_user(buf, &val[*pos], s))
+ return -EINVAL;
+
+ *pos += s;
+
+ return s;
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ras_debugfs_read,
+ .write = NULL,
+ .llseek = default_llseek
+};
+
+static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
+ *block_id = i;
+ if (strcmp(name, ras_block_string[i]) == 0)
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
+ const char __user *buf, size_t size,
+ loff_t *pos, struct ras_debug_if *data)
+{
+ ssize_t s = min_t(u64, 64, size);
+ char str[65];
+ char block_name[33];
+ char err[9] = "ue";
+ int op = -1;
+ int block_id;
+ uint32_t sub_block;
+ u64 address, value;
+ /* default value is 0 if the mask is not set by user */
+ u32 instance_mask = 0;
+
+ if (*pos)
+ return -EINVAL;
+ *pos = size;
+
+ memset(str, 0, sizeof(str));
+ memset(data, 0, sizeof(*data));
+
+ if (copy_from_user(str, buf, s))
+ return -EINVAL;
+
+ if (sscanf(str, "disable %32s", block_name) == 1)
+ op = 0;
+ else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
+ op = 1;
+ else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
+ op = 2;
+ else if (strstr(str, "retire_page") != NULL)
+ op = 3;
+ else if (str[0] && str[1] && str[2] && str[3])
+ /* ascii string, but commands are not matched. */
+ return -EINVAL;
+
+ if (op != -1) {
+ if (op == 3) {
+ if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
+ sscanf(str, "%*s %llu", &address) != 1)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+
+ return 0;
+ }
+
+ if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
+ return -EINVAL;
+
+ data->head.block = block_id;
+ /* only ue and ce errors are supported */
+ if (!memcmp("ue", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ else if (!memcmp("ce", err, 2))
+ data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else
+ return -EINVAL;
+
+ data->op = op;
+
+ if (op == 2) {
+ if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+ &sub_block, &address, &value, &instance_mask) != 4 &&
+ sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+ &sub_block, &address, &value) != 3 &&
+ sscanf(str, "%*s %*s %*s %u %llu %llu",
+ &sub_block, &address, &value) != 3)
+ return -EINVAL;
+ data->head.sub_block_index = sub_block;
+ data->inject.address = address;
+ data->inject.value = value;
+ data->inject.instance_mask = instance_mask;
+ }
+ } else {
+ if (size < sizeof(*data))
+ return -EINVAL;
+
+ if (copy_from_user(data, buf, sizeof(*data)))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
+ struct ras_debug_if *data)
+{
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+ uint32_t mask, inst_mask = data->inject.instance_mask;
+
+ /* no need to set instance mask if there is only one instance */
+ if (num_xcc <= 1 && inst_mask) {
+ data->inject.instance_mask = 0;
+ dev_dbg(adev->dev,
+ "RAS inject mask(0x%x) isn't supported and force it to 0.\n",
+ inst_mask);
+
+ return;
+ }
+
+ switch (data->head.block) {
+ case AMDGPU_RAS_BLOCK__GFX:
+ mask = GENMASK(num_xcc - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ break;
+ case AMDGPU_RAS_BLOCK__VCN:
+ case AMDGPU_RAS_BLOCK__JPEG:
+ mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+ break;
+ default:
+ mask = inst_mask;
+ break;
+ }
+
+ /* remove invalid bits in instance mask */
+ data->inject.instance_mask &= mask;
+ if (inst_mask != data->inject.instance_mask)
+ dev_dbg(adev->dev,
+ "Adjust RAS inject mask 0x%x to 0x%x\n",
+ inst_mask, data->inject.instance_mask);
+}
+
+/**
+ * DOC: AMDGPU RAS debugfs control interface
+ *
+ * The control interface accepts struct ras_debug_if which has two members.
+ *
+ * First member: ras_debug_if::head or ras_debug_if::inject.
+ *
+ * head is used to indicate which IP block will be under control.
+ *
+ * head has four members, they are block, type, sub_block_index, name.
+ * block: which IP will be under control.
+ * type: what kind of error will be enabled/disabled/injected.
+ * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
+ * name: the name of IP.
+ *
+ * inject has three more members than head, they are address, value and mask.
+ * As their names indicate, inject operation will write the
+ * value to the address.
+ *
+ * The second member: struct ras_debug_if::op.
+ * It has three kinds of operations.
+ *
+ * - 0: disable RAS on the block. Take ::head as its data.
+ * - 1: enable RAS on the block. Take ::head as its data.
+ * - 2: inject errors on the block. Take ::inject as its data.
+ *
+ * How to use the interface?
+ *
+ * In a program
+ *
+ * Copy the struct ras_debug_if in your code and initialize it.
+ * Write the struct to the control interface.
+ *
+ * From shell
+ *
+ * .. code-block:: bash
+ *
+ * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ *
+ * Where N, is the card which you want to affect.
+ *
+ * "disable" requires only the block.
+ * "enable" requires the block and error type.
+ * "inject" requires the block, error type, address, and value.
+ *
+ * The block is one of: umc, sdma, gfx, etc.
+ * see ras_block_string[] for details
+ *
+ * The error type is one of: ue, ce, where,
+ * ue is multi-uncorrectable
+ * ce is single-correctable
+ *
+ * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
+ * The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
+ *
+ * For instance,
+ *
+ * .. code-block:: bash
+ *
+ * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ *
+ * How to check the result of the operation?
+ *
+ * To check disable/enable, see "ras" features at,
+ * /sys/class/drm/card[0/1/2...]/device/ras/features
+ *
+ * To check inject, see the corresponding error count at,
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
+ *
+ * .. note::
+ * Operations are only allowed on blocks which are supported.
+ * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
+ const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct ras_debug_if data;
+ int ret = 0;
+
+ if (!amdgpu_ras_get_error_query_ready(adev)) {
+ dev_warn(adev->dev, "RAS WARN: error injection "
+ "currently inaccessible\n");
+ return size;
+ }
+
+ ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
+ if (ret)
+ return ret;
+
+ if (data.op == 3) {
+ ret = amdgpu_reserve_page_direct(adev, data.inject.address);
+ if (!ret)
+ return size;
+ else
+ return ret;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, data.head.block))
+ return -EINVAL;
+
+ switch (data.op) {
+ case 0:
+ ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
+ break;
+ case 1:
+ ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
+ break;
+ case 2:
+ if ((data.inject.address >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
+ (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ dev_warn(adev->dev, "RAS WARN: input address "
+ "0x%llx is invalid.",
+ data.inject.address);
+ ret = -EINVAL;
+ break;
+ }
+
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ amdgpu_ras_check_bad_page(adev, data.inject.address)) {
+ dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
+ "already been marked as bad!\n",
+ data.inject.address);
+ break;
+ }
+
+ amdgpu_ras_instance_mask_check(adev, &data);
+
+ /* data.inject.address is offset instead of absolute gpu address */
+ ret = amdgpu_ras_error_inject(adev, &data.inject);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ return size;
+}
+
+/**
+ * DOC: AMDGPU RAS debugfs EEPROM table reset interface
+ *
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages which experiences ECC errors in vram. This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
+ const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev =
+ (struct amdgpu_device *)file_inode(f)->i_private;
+ int ret;
+
+ ret = amdgpu_ras_eeprom_reset_table(
+ &(amdgpu_ras_get_context(adev)->eeprom_control));
+
+ if (!ret) {
+ /* Something was written to EEPROM.
+ */
+ amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
+ return size;
+ } else {
+ return ret;
+ }
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_ras_debugfs_ctrl_write,
+ .llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_ras_debugfs_eeprom_write,
+ .llseek = default_llseek
+};
+
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows the user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ * ue: 0
+ * ce: 1
+ *
+ */
+static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ if (!amdgpu_ras_get_error_query_ready(obj->adev))
+ return sysfs_emit(buf, "Query currently inaccessible\n");
+
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
+ return -EINVAL;
+
+ if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count);
+}
+
+/* obj begin */
+
+#define get_obj(obj) do { (obj)->use++; } while (0)
+#define alive_obj(obj) ((obj)->use)
+
+static inline void put_obj(struct ras_manager *obj)
+{
+ if (obj && (--obj->use == 0))
+ list_del(&obj->node);
+ if (obj && (obj->use < 0))
+ DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
+}
+
+/* make one obj and return it. */
+static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj;
+
+ if (!adev->ras_enabled || !con)
+ return NULL;
+
+ if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
+ return NULL;
+
+ if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+ if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+ return NULL;
+
+ obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+ } else
+ obj = &con->objs[head->block];
+
+ /* already exist. return obj? */
+ if (alive_obj(obj))
+ return NULL;
+
+ obj->head = *head;
+ obj->adev = adev;
+ list_add(&obj->node, &con->head);
+ get_obj(obj);
+
+ return obj;
+}
+
+/* return an obj equal to head, or the first when head is NULL */
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj;
+ int i;
+
+ if (!adev->ras_enabled || !con)
+ return NULL;
+
+ if (head) {
+ if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
+ return NULL;
+
+ if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+ if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+ return NULL;
+
+ obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+ } else
+ obj = &con->objs[head->block];
+
+ if (alive_obj(obj))
+ return obj;
+ } else {
+ for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+ obj = &con->objs[i];
+ if (alive_obj(obj))
+ return obj;
+ }
+ }
+
+ return NULL;
+}
+/* obj end */
+
+/* feature ctl begin */
+static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ return adev->ras_hw_enabled & BIT(head->block);
+}
+
+static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return con->features & BIT(head->block);
+}
+
+/*
+ * if obj is not created, then create one.
+ * set feature enable flag.
+ */
+static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+ struct ras_common_if *head, int enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+ /* If hardware does not support ras, then do not create obj.
+ * But if hardware support ras, we can create the obj.
+ * Ras framework checks con->hw_supported to see if it need do
+ * corresponding initialization.
+ * IP checks con->support to see if it need disable ras.
+ */
+ if (!amdgpu_ras_is_feature_allowed(adev, head))
+ return 0;
+
+ if (enable) {
+ if (!obj) {
+ obj = amdgpu_ras_create_obj(adev, head);
+ if (!obj)
+ return -EINVAL;
+ } else {
+ /* In case we create obj somewhere else */
+ get_obj(obj);
+ }
+ con->features |= BIT(head->block);
+ } else {
+ if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
+ con->features &= ~BIT(head->block);
+ put_obj(obj);
+ }
+ }
+
+ return 0;
+}
+
+/* wrapper of psp_ras_enable_features */
+int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+ struct ras_common_if *head, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ union ta_ras_cmd_input *info;
+ int ret;
+
+ if (!con)
+ return -EINVAL;
+
+ /* Do not enable ras feature if it is not allowed */
+ if (enable &&
+ head->block != AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_ras_is_feature_allowed(adev, head))
+ return 0;
+
+ /* Only enable gfx ras feature from host side */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_sriov_vf(adev) &&
+ !amdgpu_ras_intr_triggered()) {
+ info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ if (!enable) {
+ info->disable_features = (struct ta_ras_disable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ } else {
+ info->enable_features = (struct ta_ras_enable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ }
+
+ ret = psp_ras_enable_features(&adev->psp, info, enable);
+ if (ret) {
+ dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
+ enable ? "enable":"disable",
+ get_ras_block_str(head),
+ amdgpu_ras_is_poison_mode_supported(adev), ret);
+ kfree(info);
+ return ret;
+ }
+
+ kfree(info);
+ }
+
+ /* setup the obj */
+ __amdgpu_ras_feature_enable(adev, head, enable);
+
+ return 0;
+}
+
+/* Only used in device probe stage and called only once. */
+int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
+ struct ras_common_if *head, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ if (!con)
+ return -EINVAL;
+
+ if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
+ if (enable) {
+ /* There is no harm to issue a ras TA cmd regardless of
+ * the currecnt ras state.
+ * If current state == target state, it will do nothing
+ * But sometimes it requests driver to reset and repost
+ * with error code -EAGAIN.
+ */
+ ret = amdgpu_ras_feature_enable(adev, head, 1);
+ /* With old ras TA, we might fail to enable ras.
+ * Log it and just setup the object.
+ * TODO need remove this WA in the future.
+ */
+ if (ret == -EINVAL) {
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (!ret)
+ dev_info(adev->dev,
+ "RAS INFO: %s setup object\n",
+ get_ras_block_str(head));
+ }
+ } else {
+ /* setup the object then issue a ras TA disable cmd.*/
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (ret)
+ return ret;
+
+ /* gfx block ras dsiable cmd must send to ras-ta */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features |= BIT(head->block);
+
+ ret = amdgpu_ras_feature_enable(adev, head, 0);
+
+ /* clean gfx block ras features flag */
+ if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features &= ~BIT(head->block);
+ }
+ } else
+ ret = amdgpu_ras_feature_enable(adev, head, enable);
+
+ return ret;
+}
+
+static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
+ bool bypass)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj, *tmp;
+
+ list_for_each_entry_safe(obj, tmp, &con->head, node) {
+ /* bypass psp.
+ * aka just release the obj and corresponding flags
+ */
+ if (bypass) {
+ if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
+ break;
+ } else {
+ if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
+ break;
+ }
+ }
+
+ return con->features;
+}
+
+static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
+ bool bypass)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int i;
+ const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
+
+ for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
+ struct ras_common_if head = {
+ .block = i,
+ .type = default_ras_type,
+ .sub_block_index = 0,
+ };
+
+ if (i == AMDGPU_RAS_BLOCK__MCA)
+ continue;
+
+ if (bypass) {
+ /*
+ * bypass psp. vbios enable ras for us.
+ * so just create the obj
+ */
+ if (__amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ } else {
+ if (amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ }
+ }
+
+ for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__MCA,
+ .type = default_ras_type,
+ .sub_block_index = i,
+ };
+
+ if (bypass) {
+ /*
+ * bypass psp. vbios enable ras for us.
+ * so just create the obj
+ */
+ if (__amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ } else {
+ if (amdgpu_ras_feature_enable(adev, &head, 1))
+ break;
+ }
+ }
+
+ return con->features;
+}
+/* feature ctl end */
+
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block)
+{
+ if (!block_obj)
+ return -EINVAL;
+
+ if (block_obj->ras_comm.block == block)
+ return 0;
+
+ return -EINVAL;
+}
+
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
+{
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+
+ if (block >= AMDGPU_RAS_BLOCK__LAST)
+ return NULL;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ if (!node->ras_obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ obj = node->ras_obj;
+ if (obj->ras_block_match) {
+ if (obj->ras_block_match(obj, block, sub_block_index) == 0)
+ return obj;
+ } else {
+ if (amdgpu_ras_block_match_default(obj, block) == 0)
+ return obj;
+ }
+ }
+
+ return NULL;
+}
+
+static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ /*
+ * choosing right query method according to
+ * whether smu support query error information
+ */
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
+ if (ret == -EOPNOTSUPP) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
+ } else if (!ret) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address)
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
+ }
+}
+
+/* query/inject/cure begin */
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct amdgpu_ras_block_object *block_obj = NULL;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+
+ if (!obj)
+ return -EINVAL;
+
+ if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_get_ecc_info(adev, &err_data);
+ } else {
+ block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->query_ras_error_count)
+ block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+
+ if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+ }
+ }
+
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ info->ue_count = obj->err_data.ue_count;
+ info->ce_count = obj->err_data.ce_count;
+
+ if (err_data.ce_count) {
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ dev_info(adev->dev, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block, no user "
+ "action is needed.\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ obj->err_data.ce_count,
+ get_ras_block_str(&info->head));
+ } else {
+ dev_info(adev->dev, "%ld correctable hardware errors "
+ "detected in %s block, no user "
+ "action is needed.\n",
+ obj->err_data.ce_count,
+ get_ras_block_str(&info->head));
+ }
+ }
+ if (err_data.ue_count) {
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ dev_info(adev->dev, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ obj->err_data.ue_count,
+ get_ras_block_str(&info->head));
+ } else {
+ dev_info(adev->dev, "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ obj->err_data.ue_count,
+ get_ras_block_str(&info->head));
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
+ if (!amdgpu_ras_is_supported(adev, block))
+ return -EINVAL;
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ ras_block_str(block));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->reset_ras_error_count)
+ block_obj->hw_ops->reset_ras_error_count(adev);
+
+ if ((block == AMDGPU_RAS_BLOCK__GFX) ||
+ (block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->reset_ras_error_status)
+ block_obj->hw_ops->reset_ras_error_status(adev);
+ }
+
+ return 0;
+}
+
+/* wrapper of psp_ras_trigger_error */
+int amdgpu_ras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ta_ras_trigger_error_input block_info = {
+ .block_id = amdgpu_ras_block_to_ta(info->head.block),
+ .inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
+ .sub_block_index = info->head.sub_block_index,
+ .address = info->address,
+ .value = info->value,
+ };
+ int ret = -EINVAL;
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ /* inject on guest isn't allowed, return success directly */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (!obj)
+ return -EINVAL;
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
+ /* Calculate XGMI relative offset */
+ if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+ info->head.block != AMDGPU_RAS_BLOCK__GFX) {
+ block_info.address =
+ amdgpu_xgmi_get_relative_phy_addr(adev,
+ block_info.address);
+ }
+
+ if (block_obj->hw_ops->ras_error_inject) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
+ ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
+ else /* Special ras_error_inject is defined (e.g: xgmi) */
+ ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+ info->instance_mask);
+ } else {
+ /* default path */
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "ras inject %s failed %d\n",
+ get_ras_block_str(&info->head), ret);
+
+ return ret;
+}
+
+/**
+ * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
+ * @query_info: pointer to ras_query_if
+ *
+ * Return 0 for query success or do nothing, otherwise return an error
+ * on failures
+ */
+static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
+{
+ int ret;
+
+ if (!query_info)
+ /* do nothing if query_info is not specified */
+ return 0;
+
+ ret = amdgpu_ras_query_error_status(adev, query_info);
+ if (ret)
+ return ret;
+
+ *ce_count += query_info->ce_count;
+ *ue_count += query_info->ue_count;
+
+ /* some hardware/IP supports read to clear
+ * no need to explictly reset the err status after the query call */
+ if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
+ dev_warn(adev->dev,
+ "Failed to reset error counter and error status\n");
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible
+ * errors.
+ * @query_info: pointer to ras_query_if if the query request is only for
+ * specific ip block; if info is NULL, then the qurey request is for
+ * all the ip blocks that support query ras error counters/status
+ *
+ * If set, @ce_count or @ue_count, count and return the corresponding
+ * error counts in those integer pointers. Return 0 if the device
+ * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
+ */
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj;
+ unsigned long ce, ue;
+ int ret;
+
+ if (!adev->ras_enabled || !con)
+ return -EOPNOTSUPP;
+
+ /* Don't count since no reporting.
+ */
+ if (!ce_count && !ue_count)
+ return 0;
+
+ ce = 0;
+ ue = 0;
+ if (!query_info) {
+ /* query all the ip blocks that support ras query interface */
+ list_for_each_entry(obj, &con->head, node) {
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
+ }
+ } else {
+ /* query specific ip block */
+ ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
+ }
+
+ if (ret)
+ return ret;
+
+ if (ce_count)
+ *ce_count = ce;
+
+ if (ue_count)
+ *ue_count = ue;
+
+ return 0;
+}
+/* query/inject/cure end */
+
+
+/* sysfs begin */
+
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+ switch (flags) {
+ case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
+ return "R";
+ case AMDGPU_RAS_RETIRE_PAGE_PENDING:
+ return "P";
+ case AMDGPU_RAS_RETIRE_PAGE_FAULT:
+ default:
+ return "F";
+ }
+}
+
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ *
+ * R: reserved, this gpu page is reserved and not able to use.
+ *
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ * in next window of page_reserve.
+ *
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ *
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+ struct kobject *kobj, struct bin_attribute *attr,
+ char *buf, loff_t ppos, size_t count)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, badpages_attr);
+ struct amdgpu_device *adev = con->adev;
+ const unsigned int element_size =
+ sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+ unsigned int start = div64_ul(ppos + element_size - 1, element_size);
+ unsigned int end = div64_ul(ppos + count - 1, element_size);
+ ssize_t s = 0;
+ struct ras_badpage *bps = NULL;
+ unsigned int bps_count = 0;
+
+ memset(buf, 0, count);
+
+ if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ return 0;
+
+ for (; start < end && start < bps_count; start++)
+ s += scnprintf(&buf[s], element_size + 1,
+ "0x%08x : 0x%08x : %1s\n",
+ bps[start].bp,
+ bps[start].size,
+ amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+ kfree(bps);
+
+ return s;
+}
+
+static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, features_attr);
+
+ return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
+}
+
+static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
+ &con->badpages_attr.attr,
+ RAS_FS_NAME);
+}
+
+static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct attribute *attrs[] = {
+ &con->features_attr.attr,
+ NULL
+ };
+ struct attribute_group group = {
+ .name = RAS_FS_NAME,
+ .attrs = attrs,
+ };
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_group(&adev->dev->kobj, &group);
+
+ return 0;
+}
+
+int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+ if (!obj || obj->attr_inuse)
+ return -EINVAL;
+
+ get_obj(obj);
+
+ snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
+ "%s_err_count", head->name);
+
+ obj->sysfs_attr = (struct device_attribute){
+ .attr = {
+ .name = obj->fs_data.sysfs_name,
+ .mode = S_IRUGO,
+ },
+ .show = amdgpu_ras_sysfs_read,
+ };
+ sysfs_attr_init(&obj->sysfs_attr.attr);
+
+ if (sysfs_add_file_to_group(&adev->dev->kobj,
+ &obj->sysfs_attr.attr,
+ RAS_FS_NAME)) {
+ put_obj(obj);
+ return -EINVAL;
+ }
+
+ obj->attr_inuse = 1;
+
+ return 0;
+}
+
+int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+ if (!obj || !obj->attr_inuse)
+ return -EINVAL;
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
+ &obj->sysfs_attr.attr,
+ RAS_FS_NAME);
+ obj->attr_inuse = 0;
+ put_obj(obj);
+
+ return 0;
+}
+
+static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj, *tmp;
+
+ list_for_each_entry_safe(obj, tmp, &con->head, node) {
+ amdgpu_ras_sysfs_remove(adev, &obj->head);
+ }
+
+ if (amdgpu_bad_page_threshold != 0)
+ amdgpu_ras_sysfs_remove_bad_page_node(adev);
+
+ amdgpu_ras_sysfs_remove_feature_node(adev);
+
+ return 0;
+}
+/* sysfs end */
+
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
+/* debugfs begin */
+static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *dir;
+
+ dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
+ debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_ops);
+ debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
+ &con->bad_page_cnt_threshold);
+ debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
+ debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
+ debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
+ debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_size_ops);
+ con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
+ S_IRUGO, dir, adev,
+ &amdgpu_ras_debugfs_eeprom_table_ops);
+ amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
+
+ /*
+ * After one uncorrectable error happens, usually GPU recovery will
+ * be scheduled. But due to the known problem in GPU recovery failing
+ * to bring GPU back, below interface provides one direct way to
+ * user to reboot system automatically in such case within
+ * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
+ * will never be called.
+ */
+ debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
+
+ /*
+ * User could set this not to clean up hardware's error count register
+ * of RAS IPs during ras recovery.
+ */
+ debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
+ &con->disable_ras_err_cnt_harvest);
+ return dir;
+}
+
+static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+ struct ras_fs_if *head,
+ struct dentry *dir)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
+
+ if (!obj || !dir)
+ return;
+
+ get_obj(obj);
+
+ memcpy(obj->fs_data.debugfs_name,
+ head->debugfs_name,
+ sizeof(obj->fs_data.debugfs_name));
+
+ debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
+ obj, &amdgpu_ras_debugfs_ops);
+}
+
+void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct dentry *dir;
+ struct ras_manager *obj;
+ struct ras_fs_if fs_info;
+
+ /*
+ * it won't be called in resume path, no need to check
+ * suspend and gpu reset status
+ */
+ if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
+ return;
+
+ dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
+
+ list_for_each_entry(obj, &con->head, node) {
+ if (amdgpu_ras_is_supported(adev, obj->head.block) &&
+ (obj->attr_inuse == 1)) {
+ sprintf(fs_info.debugfs_name, "%s_err_inject",
+ get_ras_block_str(&obj->head));
+ fs_info.head = obj->head;
+ amdgpu_ras_debugfs_create(adev, &fs_info, dir);
+ }
+ }
+}
+
+/* debugfs end */
+
+/* ras fs */
+static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static DEVICE_ATTR(features, S_IRUGO,
+ amdgpu_ras_sysfs_features_read, NULL);
+static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct attribute_group group = {
+ .name = RAS_FS_NAME,
+ };
+ struct attribute *attrs[] = {
+ &con->features_attr.attr,
+ NULL
+ };
+ struct bin_attribute *bin_attrs[] = {
+ NULL,
+ NULL,
+ };
+ int r;
+
+ /* add features entry */
+ con->features_attr = dev_attr_features;
+ group.attrs = attrs;
+ sysfs_attr_init(attrs[0]);
+
+ if (amdgpu_bad_page_threshold != 0) {
+ /* add bad_page_features entry */
+ bin_attr_gpu_vram_bad_pages.private = NULL;
+ con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ bin_attrs[0] = &con->badpages_attr;
+ group.bin_attrs = bin_attrs;
+ sysfs_bin_attr_init(bin_attrs[0]);
+ }
+
+ r = sysfs_create_group(&adev->dev->kobj, &group);
+ if (r)
+ dev_err(adev->dev, "Failed to create RAS sysfs group!");
+
+ return 0;
+}
+
+static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *con_obj, *ip_obj, *tmp;
+
+ if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+ list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
+ ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
+ if (ip_obj)
+ put_obj(ip_obj);
+ }
+ }
+
+ amdgpu_ras_sysfs_remove_all(adev);
+ return 0;
+}
+/* ras fs end */
+
+/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+ /* Fatal error events are handled on host side */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ bool poison_stat = false;
+ struct amdgpu_device *adev = obj->adev;
+ struct amdgpu_ras_block_object *block_obj =
+ amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+
+ if (!block_obj)
+ return;
+
+ /* both query_poison_status and handle_poison_consumption are optional,
+ * but at least one of them should be implemented if we need poison
+ * consumption handler
+ */
+ if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
+ poison_stat = block_obj->hw_ops->query_poison_status(adev);
+ if (!poison_stat) {
+ /* Not poison consumption interrupt, no need to handle it */
+ dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+ block_obj->ras_comm.name);
+
+ return;
+ }
+ }
+
+ amdgpu_umc_poison_handler(adev, false);
+
+ if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
+ poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+ /* gpu reset is fallback for failed and default cases */
+ if (poison_stat) {
+ dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
+ amdgpu_ras_reset_gpu(adev);
+ } else {
+ amdgpu_gfx_poison_consumption_handler(adev, entry);
+ }
+}
+
+static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_info(obj->adev->dev,
+ "Poison is created, no user action is needed.\n");
+}
+
+static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_ih_data *data = &obj->ih_data;
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ int ret;
+
+ if (!data->cb)
+ return;
+
+ /* Let IP handle its data, maybe we need get the output
+ * from the callback to update the error type/count, etc
+ */
+ ret = data->cb(obj->adev, &err_data, entry);
+ /* ue will trigger an interrupt, and in that case
+ * we need do a reset to recovery the whole system.
+ * But leave IP do that recovery, here we just dispatch
+ * the error.
+ */
+ if (ret == AMDGPU_RAS_SUCCESS) {
+ /* these counts could be left as 0 if
+ * some blocks do not count error number
+ */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
+}
+
+static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
+{
+ struct ras_ih_data *data = &obj->ih_data;
+ struct amdgpu_iv_entry entry;
+
+ while (data->rptr != data->wptr) {
+ rmb();
+ memcpy(&entry, &data->ring[data->rptr],
+ data->element_size);
+
+ wmb();
+ data->rptr = (data->aligned_element_size +
+ data->rptr) % data->ring_size;
+
+ if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
+ else
+ amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
+ } else {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_umc_handler(obj, &entry);
+ else
+ dev_warn(obj->adev->dev,
+ "No RAS interrupt handler for non-UMC block with poison disabled.\n");
+ }
+ }
+}
+
+static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
+{
+ struct ras_ih_data *data =
+ container_of(work, struct ras_ih_data, ih_work);
+ struct ras_manager *obj =
+ container_of(data, struct ras_manager, ih_data);
+
+ amdgpu_ras_interrupt_handler(obj);
+}
+
+int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
+ struct ras_dispatch_if *info)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_ih_data *data = &obj->ih_data;
+
+ if (!obj)
+ return -EINVAL;
+
+ if (data->inuse == 0)
+ return 0;
+
+ /* Might be overflow... */
+ memcpy(&data->ring[data->wptr], info->entry,
+ data->element_size);
+
+ wmb();
+ data->wptr = (data->aligned_element_size +
+ data->wptr) % data->ring_size;
+
+ schedule_work(&data->ih_work);
+
+ return 0;
+}
+
+int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ struct ras_ih_data *data;
+
+ if (!obj)
+ return -EINVAL;
+
+ data = &obj->ih_data;
+ if (data->inuse == 0)
+ return 0;
+
+ cancel_work_sync(&data->ih_work);
+
+ kfree(data->ring);
+ memset(data, 0, sizeof(*data));
+ put_obj(obj);
+
+ return 0;
+}
+
+int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
+ struct ras_common_if *head)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ struct ras_ih_data *data;
+ struct amdgpu_ras_block_object *ras_obj;
+
+ if (!obj) {
+ /* in case we registe the IH before enable ras feature */
+ obj = amdgpu_ras_create_obj(adev, head);
+ if (!obj)
+ return -EINVAL;
+ } else
+ get_obj(obj);
+
+ ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
+
+ data = &obj->ih_data;
+ /* add the callback.etc */
+ *data = (struct ras_ih_data) {
+ .inuse = 0,
+ .cb = ras_obj->ras_cb,
+ .element_size = sizeof(struct amdgpu_iv_entry),
+ .rptr = 0,
+ .wptr = 0,
+ };
+
+ INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
+
+ data->aligned_element_size = ALIGN(data->element_size, 8);
+ /* the ring can store 64 iv entries. */
+ data->ring_size = 64 * data->aligned_element_size;
+ data->ring = kmalloc(data->ring_size, GFP_KERNEL);
+ if (!data->ring) {
+ put_obj(obj);
+ return -ENOMEM;
+ }
+
+ /* IH is ready */
+ data->inuse = 1;
+
+ return 0;
+}
+
+static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj, *tmp;
+
+ list_for_each_entry_safe(obj, tmp, &con->head, node) {
+ amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
+ }
+
+ return 0;
+}
+/* ih end */
+
+/* traversal all IPs except NBIO to query error counter */
+static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj;
+
+ if (!adev->ras_enabled || !con)
+ return;
+
+ list_for_each_entry(obj, &con->head, node) {
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ /*
+ * PCIE_BIF IP has one different isr by ras controller
+ * interrupt, the specific ras counter query will be
+ * done in that isr. So skip such block from common
+ * sync flood interrupt isr calling.
+ */
+ if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
+ continue;
+
+ /*
+ * this is a workaround for aldebaran, skip send msg to
+ * smu to get ecc_info table due to smu handle get ecc
+ * info table failed temporarily.
+ * should be removed until smu fix handle ecc_info table.
+ */
+ if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))
+ continue;
+
+ amdgpu_ras_query_error_status(adev, &info);
+
+ if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) {
+ if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ dev_warn(adev->dev, "Failed to reset error counter and error status");
+ }
+ }
+}
+
+/* Parse RdRspStatus and WrRspStatus */
+static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct amdgpu_ras_block_object *block_obj;
+ /*
+ * Only two block need to query read/write
+ * RspStatus at current state
+ */
+ if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
+ (info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ block_obj = amdgpu_ras_get_ras_block(adev,
+ info->head.block,
+ info->head.sub_block_index);
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return;
+ }
+
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+
+}
+
+static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj;
+
+ if (!adev->ras_enabled || !con)
+ return;
+
+ list_for_each_entry(obj, &con->head, node) {
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ amdgpu_ras_error_status_query(adev, &info);
+ }
+}
+
+/* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int i = 0;
+ int ret = 0, status;
+
+ if (!con || !con->eh_data || !bps || !count)
+ return -EINVAL;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data || data->count == 0) {
+ *bps = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+ if (!*bps) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (; i < data->count; i++) {
+ (*bps)[i] = (struct ras_badpage){
+ .bp = data->bps[i].retired_page,
+ .size = AMDGPU_GPU_PAGE_SIZE,
+ .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+ };
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+ data->bps[i].retired_page);
+ if (status == -EBUSY)
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (status == -ENOENT)
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ }
+
+ *count = data->count;
+out:
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
+static void amdgpu_ras_do_recovery(struct work_struct *work)
+{
+ struct amdgpu_ras *ras =
+ container_of(work, struct amdgpu_ras, recovery_work);
+ struct amdgpu_device *remote_adev = NULL;
+ struct amdgpu_device *adev = ras->adev;
+ struct list_head device_list, *device_list_handle = NULL;
+
+ if (!ras->disable_ras_err_cnt_harvest) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ /* Build list of devices to query RAS related errors */
+ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
+ device_list_handle = &hive->device_list;
+ } else {
+ INIT_LIST_HEAD(&device_list);
+ list_add_tail(&adev->gmc.xgmi.head, &device_list);
+ device_list_handle = &device_list;
+ }
+
+ list_for_each_entry(remote_adev,
+ device_list_handle, gmc.xgmi.head) {
+ amdgpu_ras_query_err_status(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev);
+ }
+
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (amdgpu_device_should_recover_gpu(ras->adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+
+ /* Perform full reset in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ else {
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset_context.method = AMD_RESET_METHOD_MODE2;
+ }
+
+ /* Fatal error occurs in poison mode, mode1 reset is used to
+ * recover gpu.
+ */
+ if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+ ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ psp_fatal_error_recovery_quirk(&adev->psp);
+ }
+ }
+
+ amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+ }
+ atomic_set(&ras->in_recovery, 0);
+}
+
+/* alloc/realloc bps array */
+static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
+ struct ras_err_handler_data *data, int pages)
+{
+ unsigned int old_space = data->count + data->space_left;
+ unsigned int new_space = old_space + pages;
+ unsigned int align_space = ALIGN(new_space, 512);
+ void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+
+ if (!bps) {
+ return -ENOMEM;
+ }
+
+ if (data->bps) {
+ memcpy(bps, data->bps,
+ data->count * sizeof(*data->bps));
+ kfree(data->bps);
+ }
+
+ data->bps = bps;
+ data->space_left += align_space - old_space;
+ return 0;
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int ret = 0;
+ uint32_t i;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+ return 0;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data)
+ goto out;
+
+ for (i = 0; i < pages; i++) {
+ if (amdgpu_ras_check_bad_page_unlock(con,
+ bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ continue;
+
+ if (!data->space_left &&
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+ bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE);
+
+ memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+ data->count++;
+ data->space_left--;
+ }
+out:
+ mutex_unlock(&con->recovery_lock);
+
+ return ret;
+}
+
+/*
+ * write error record array to eeprom, the function should be
+ * protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
+ */
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ struct amdgpu_ras_eeprom_control *control;
+ int save_count;
+
+ if (!con || !con->eh_data) {
+ if (new_cnt)
+ *new_cnt = 0;
+
+ return 0;
+ }
+
+ mutex_lock(&con->recovery_lock);
+ control = &con->eeprom_control;
+ data = con->eh_data;
+ save_count = data->count - control->ras_num_recs;
+ mutex_unlock(&con->recovery_lock);
+
+ if (new_cnt)
+ *new_cnt = save_count / adev->umc.retire_unit;
+
+ /* only new entries are saved */
+ if (save_count > 0) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[control->ras_num_recs],
+ save_count)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+
+ dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+ }
+
+ return 0;
+}
+
+/*
+ * read error record array in eeprom and reserve enough space for
+ * storing new bad pages
+ */
+static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ struct eeprom_table_record *bps;
+ int ret;
+
+ /* no bad page record, skip eeprom access */
+ if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
+ return 0;
+
+ bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
+ return -ENOMEM;
+
+ ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
+ if (ret)
+ dev_err(adev->dev, "Failed to load EEPROM table records!");
+ else
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
+
+ kfree(bps);
+ return ret;
+}
+
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+ uint64_t addr)
+{
+ struct ras_err_handler_data *data = con->eh_data;
+ int i;
+
+ addr >>= AMDGPU_GPU_PAGE_SHIFT;
+ for (i = 0; i < data->count; i++)
+ if (addr == data->bps[i].retired_page)
+ return true;
+
+ return false;
+}
+
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool ret = false;
+
+ if (!con || !con->eh_data)
+ return ret;
+
+ mutex_lock(&con->recovery_lock);
+ ret = amdgpu_ras_check_bad_page_unlock(con, addr);
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+ uint32_t max_count)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ /*
+ * Justification of value bad_page_cnt_threshold in ras structure
+ *
+ * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
+ * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
+ * scenarios accordingly.
+ *
+ * Bad page retirement enablement:
+ * - If amdgpu_bad_page_threshold = -2,
+ * bad_page_cnt_threshold = typical value by formula.
+ *
+ * - When the value from user is 0 < amdgpu_bad_page_threshold <
+ * max record length in eeprom, use it directly.
+ *
+ * Bad page retirement disablement:
+ * - If amdgpu_bad_page_threshold = 0, bad page retirement
+ * functionality is disabled, and bad_page_cnt_threshold will
+ * take no effect.
+ */
+
+ if (amdgpu_bad_page_threshold < 0) {
+ u64 val = adev->gmc.mc_vram_size;
+
+ do_div(val, RAS_BAD_PAGE_COVER);
+ con->bad_page_cnt_threshold = min(lower_32_bits(val),
+ max_count);
+ } else {
+ con->bad_page_cnt_threshold = min_t(int, max_count,
+ amdgpu_bad_page_threshold);
+ }
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data **data;
+ u32 max_eeprom_records_count = 0;
+ bool exc_err_limit = false;
+ int ret;
+
+ if (!con || amdgpu_sriov_vf(adev))
+ return 0;
+
+ /* Allow access to RAS EEPROM via debugfs, when the ASIC
+ * supports RAS and debugfs is enabled, but when
+ * adev->ras_enabled is unset, i.e. when "ras_enable"
+ * module parameter is set to 0.
+ */
+ con->adev = adev;
+
+ if (!adev->ras_enabled)
+ return 0;
+
+ data = &con->eh_data;
+ *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ if (!*data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_init(&con->recovery_lock);
+ INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
+ atomic_set(&con->in_recovery, 0);
+ con->eeprom_control.bad_channel_bitmap = 0;
+
+ max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
+ amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
+
+ /* Todo: During test the SMU might fail to read the eeprom through I2C
+ * when the GPU is pending on XGMI reset during probe time
+ * (Mostly after second bus reset), skip it now
+ */
+ if (adev->gmc.xgmi.pending_reset)
+ return 0;
+ ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
+ /*
+ * This calling fails when exc_err_limit is true or
+ * ret != 0.
+ */
+ if (exc_err_limit || ret)
+ goto free;
+
+ if (con->eeprom_control.ras_num_recs) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret)
+ goto free;
+
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+ }
+
+#ifdef CONFIG_X86_MCE_AMD
+ if ((adev->asic_type == CHIP_ALDEBARAN) &&
+ (adev->gmc.xgmi.connected_to_cpu))
+ amdgpu_register_bad_pages_mca_notifier(adev);
+#endif
+ return 0;
+
+free:
+ kfree((*data)->bps);
+ kfree(*data);
+ con->eh_data = NULL;
+out:
+ dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
+
+ /*
+ * Except error threshold exceeding case, other failure cases in this
+ * function would not fail amdgpu driver init.
+ */
+ if (!exc_err_limit)
+ ret = 0;
+ else
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data = con->eh_data;
+
+ /* recovery_init failed to init it, fini is useless */
+ if (!data)
+ return 0;
+
+ cancel_work_sync(&con->recovery_work);
+
+ mutex_lock(&con->recovery_lock);
+ con->eh_data = NULL;
+ kfree(data->bps);
+ kfree(data);
+ mutex_unlock(&con->recovery_lock);
+
+ return 0;
+}
+/* recovery end */
+
+static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (adev->asic_type == CHIP_IP_DISCOVERY) {
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return adev->asic_type == CHIP_VEGA10 ||
+ adev->asic_type == CHIP_VEGA20 ||
+ adev->asic_type == CHIP_ARCTURUS ||
+ adev->asic_type == CHIP_ALDEBARAN ||
+ adev->asic_type == CHIP_SIENNA_CICHLID;
+}
+
+/*
+ * this is workaround for vega20 workstation sku,
+ * force enable gfx ras, ignore vbios gfx ras flag
+ * due to GC EDC can not write
+ */
+static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ if (!ctx)
+ return;
+
+ if (strnstr(ctx->vbios_pn, "D16406",
+ sizeof(ctx->vbios_pn)) ||
+ strnstr(ctx->vbios_pn, "D36002",
+ sizeof(ctx->vbios_pn)))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+}
+
+/*
+ * check hardware's ras ability which will be saved in hw_supported.
+ * if hardware does not support ras, we can skip some ras initializtion and
+ * forbid some ras operations from IP.
+ * if software itself, say boot parameter, limit the ras ability. We still
+ * need allow IP do some limited operations, like disable. In such case,
+ * we have to initialize ras as normal. but need check if operation is
+ * allowed or not in each function.
+ */
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
+{
+ adev->ras_hw_enabled = adev->ras_enabled = 0;
+
+ if (!amdgpu_ras_asic_supported(adev))
+ return;
+
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+ dev_info(adev->dev, "MEM ECC is active.\n");
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else {
+ dev_info(adev->dev, "MEM ECC is not presented.\n");
+ }
+
+ if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+ dev_info(adev->dev, "SRAM ECC is active.\n");
+ if (!amdgpu_sriov_vf(adev))
+ adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ else
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__GFX);
+
+ /* VCN/JPEG RAS can be supported on both bare metal and
+ * SRIOV environment
+ */
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+ adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+
+ /*
+ * XGMI RAS is not supported if xgmi num physical nodes
+ * is zero
+ */
+ if (!adev->gmc.xgmi.num_physical_nodes)
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ } else {
+ dev_info(adev->dev, "SRAM ECC is not presented.\n");
+ }
+ } else {
+ /* driver only manages a few IP blocks RAS feature
+ * when GPU is connected cpu through XGMI */
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ amdgpu_ras_get_quirks(adev);
+
+ /* hw_supported needs to be aligned with RAS block mask. */
+ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
+
+
+ /*
+ * Disable ras feature for aqua vanjaram
+ * by default on apu platform.
+ */
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6) &&
+ adev->gmc.is_app_apu)
+ adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 :
+ adev->ras_hw_enabled & amdgpu_ras_mask;
+ else
+ adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+ adev->ras_hw_enabled & amdgpu_ras_mask;
+}
+
+static void amdgpu_ras_counte_dw(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ ras_counte_delay_work.work);
+ struct amdgpu_device *adev = con->adev;
+ struct drm_device *dev = adev_to_drm(adev);
+ unsigned long ce_count, ue_count;
+ int res;
+
+ res = pm_runtime_get_sync(dev->dev);
+ if (res < 0)
+ goto Out;
+
+ /* Cache new values.
+ */
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
+ atomic_set(&con->ras_ce_count, ce_count);
+ atomic_set(&con->ras_ue_count, ue_count);
+ }
+
+ pm_runtime_mark_last_busy(dev->dev);
+Out:
+ pm_runtime_put_autosuspend(dev->dev);
+}
+
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
+}
+
+int amdgpu_ras_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int r;
+
+ if (con)
+ return 0;
+
+ con = kmalloc(sizeof(struct amdgpu_ras) +
+ sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
+ sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
+ GFP_KERNEL|__GFP_ZERO);
+ if (!con)
+ return -ENOMEM;
+
+ con->adev = adev;
+ INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
+ atomic_set(&con->ras_ce_count, 0);
+ atomic_set(&con->ras_ue_count, 0);
+
+ con->objs = (struct ras_manager *)(con + 1);
+
+ amdgpu_ras_set_context(adev, con);
+
+ amdgpu_ras_check_supported(adev);
+
+ if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
+ /* set gfx block ras context feature for VEGA20 Gaming
+ * send ras disable cmd to ras ta during ras late init.
+ */
+ if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
+ con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
+
+ return 0;
+ }
+
+ r = 0;
+ goto release_con;
+ }
+
+ con->update_channel_flag = false;
+ con->features = 0;
+ INIT_LIST_HEAD(&con->head);
+ /* Might need get this flag from vbios. */
+ con->flags = RAS_DEFAULT_FLAGS;
+
+ /* initialize nbio ras function ahead of any other
+ * ras functions so hardware fatal error interrupt
+ * can be enabled as early as possible */
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 4, 0):
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->nbio.ras = &nbio_v7_4_ras;
+ break;
+ case IP_VERSION(4, 3, 0):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbio v4_3 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS */
+ adev->nbio.ras = &nbio_v4_3_ras;
+ break;
+ case IP_VERSION(7, 9, 0):
+ if (!adev->gmc.is_app_apu)
+ adev->nbio.ras = &nbio_v7_9_ras;
+ break;
+ default:
+ /* nbio ras is not available */
+ break;
+ }
+
+ /* nbio ras block needs to be enabled ahead of other ras blocks
+ * to handle fatal error */
+ r = amdgpu_nbio_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt) {
+ r = adev->nbio.ras->init_ras_controller_interrupt(adev);
+ if (r)
+ goto release_con;
+ }
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
+ if (r)
+ goto release_con;
+ }
+
+ amdgpu_ras_query_poison_mode(adev);
+
+ if (amdgpu_ras_fs_init(adev)) {
+ r = -EINVAL;
+ goto release_con;
+ }
+
+ dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
+ "hardware ability[%x] ras_mask[%x]\n",
+ adev->ras_hw_enabled, adev->ras_enabled);
+
+ return 0;
+release_con:
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+
+ return r;
+}
+
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
+{
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu)
+ return 1;
+ return 0;
+}
+
+static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ struct ras_query_if info = {
+ .head = *ras_block,
+ };
+
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ return 0;
+
+ if (amdgpu_ras_query_error_status(adev, &info) != 0)
+ DRM_WARN("RAS init harvest failure");
+
+ if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
+ DRM_WARN("RAS init harvest reset failure");
+
+ return 0;
+}
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return false;
+
+ return con->poison_supported;
+}
+
+/* helper function to handle common stuff in ip late init phase */
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ struct amdgpu_ras_block_object *ras_obj = NULL;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_query_if *query_info;
+ unsigned long ue_count, ce_count;
+ int r;
+
+ /* disable RAS feature per IP block if it is not supported */
+ if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+ return 0;
+ }
+
+ r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
+ if (r) {
+ if (adev->in_suspend || amdgpu_in_reset(adev)) {
+ /* in resume phase, if fail to enable ras,
+ * clean up all ras fs nodes, and disable ras */
+ goto cleanup;
+ } else
+ return r;
+ }
+
+ /* check for errors on warm reset edc persisant supported ASIC */
+ amdgpu_persistent_edc_harvesting(adev, ras_block);
+
+ /* in resume phase, no need to create ras fs node */
+ if (adev->in_suspend || amdgpu_in_reset(adev))
+ return 0;
+
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_poison_status ||
+ ras_obj->hw_ops->handle_poison_consumption))) {
+ r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
+ if (r)
+ goto cleanup;
+ }
+
+ if (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_ras_error_count ||
+ ras_obj->hw_ops->query_ras_error_status)) {
+ r = amdgpu_ras_sysfs_create(adev, ras_block);
+ if (r)
+ goto interrupt;
+
+ /* Those are the cached values at init.
+ */
+ query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
+ if (!query_info)
+ return -ENOMEM;
+ memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
+
+ if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
+ atomic_set(&con->ras_ce_count, ce_count);
+ atomic_set(&con->ras_ue_count, ue_count);
+ }
+
+ kfree(query_info);
+ }
+
+ return 0;
+
+interrupt:
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+cleanup:
+ amdgpu_ras_feature_enable(adev, ras_block, 0);
+ return r;
+}
+
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
+/* helper function to remove ras fs node and interrupt handler */
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ struct amdgpu_ras_block_object *ras_obj;
+ if (!ras_block)
+ return;
+
+ amdgpu_ras_sysfs_remove(adev, ras_block);
+
+ ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+ if (ras_obj->ras_cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+}
+
+static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ return amdgpu_ras_block_late_fini(adev, ras_block);
+}
+
+/* do some init work after IP late init as dependence.
+ * and it runs in resume/gpu reset/booting up cases.
+ */
+void amdgpu_ras_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_manager *obj, *tmp;
+
+ if (!adev->ras_enabled || !con) {
+ /* clean ras context for VEGA20 Gaming after send ras disable cmd */
+ amdgpu_release_ras_context(adev);
+
+ return;
+ }
+
+ if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
+ /* Set up all other IPs which are not implemented. There is a
+ * tricky thing that IP's actual ras error type should be
+ * MULTI_UNCORRECTABLE, but as driver does not handle it, so
+ * ERROR_NONE make sense anyway.
+ */
+ amdgpu_ras_enable_all_features(adev, 1);
+
+ /* We enable ras on all hw_supported block, but as boot
+ * parameter might disable some of them and one or more IP has
+ * not implemented yet. So we disable them on behalf.
+ */
+ list_for_each_entry_safe(obj, tmp, &con->head, node) {
+ if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
+ amdgpu_ras_feature_enable(adev, &obj->head, 0);
+ /* there should be no any reference. */
+ WARN_ON(alive_obj(obj));
+ }
+ }
+ }
+}
+
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!adev->ras_enabled || !con)
+ return;
+
+ amdgpu_ras_disable_all_features(adev, 0);
+ /* Make sure all ras objects are disabled. */
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 1);
+}
+
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_block_list *node, *tmp;
+ struct amdgpu_ras_block_object *obj;
+ int r;
+
+ /* Guest side doesn't need init ras feature */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+ if (!node->ras_obj) {
+ dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+ continue;
+ }
+
+ obj = node->ras_obj;
+ if (obj->ras_late_init) {
+ r = obj->ras_late_init(adev, &obj->ras_comm);
+ if (r) {
+ dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
+ obj->ras_comm.name, r);
+ return r;
+ }
+ } else
+ amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+ }
+
+ return 0;
+}
+
+/* do some fini work before IP fini as dependence */
+int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!adev->ras_enabled || !con)
+ return 0;
+
+
+ /* Need disable ras on all IPs here before ip [hw/sw]fini */
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 0);
+ amdgpu_ras_recovery_fini(adev);
+ return 0;
+}
+
+int amdgpu_ras_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_block_list *ras_node, *tmp;
+ struct amdgpu_ras_block_object *obj = NULL;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!adev->ras_enabled || !con)
+ return 0;
+
+ list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
+ if (ras_node->ras_obj) {
+ obj = ras_node->ras_obj;
+ if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
+ obj->ras_fini)
+ obj->ras_fini(adev, &obj->ras_comm);
+ else
+ amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
+ }
+
+ /* Clear ras blocks from ras_list and free ras block list node */
+ list_del(&ras_node->node);
+ kfree(ras_node);
+ }
+
+ amdgpu_ras_fs_fini(adev);
+ amdgpu_ras_interrupt_remove_all(adev);
+
+ WARN(con->features, "Feature mask is not cleared");
+
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 1);
+
+ cancel_delayed_work_sync(&con->ras_counte_delay_work);
+
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+
+ return 0;
+}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
+ if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ dev_info(adev->dev, "uncorrectable hardware error"
+ "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_VEGA20 &&
+ adev->pm.fw_version <= 0x283400) {
+ return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
+ amdgpu_ras_intr_triggered();
+ }
+
+ return false;
+}
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return;
+
+ if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+ con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+ }
+}
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+ int i;
+ struct amdgpu_device *adev = NULL;
+
+ for (i = 0; i < mce_adev_list.num_gpu; i++) {
+ adev = mce_adev_list.devs[i];
+
+ if (adev && adev->gmc.xgmi.connected_to_cpu &&
+ adev->gmc.xgmi.physical_node_id == node_id)
+ break;
+ adev = NULL;
+ }
+
+ return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m) (((m) >> 44) & 0xF)
+#define GET_UMC_INST(m) (((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m) ((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET 8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct mce *m = (struct mce *)data;
+ struct amdgpu_device *adev = NULL;
+ uint32_t gpu_id = 0;
+ uint32_t umc_inst = 0, ch_inst = 0;
+
+ /*
+ * If the error was generated in UMC_V2, which belongs to GPU UMCs,
+ * and error occurred in DramECC (Extended error code = 0) then only
+ * process the error, else bail out.
+ */
+ if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
+ (XEC(m->status, 0x3f) == 0x0)))
+ return NOTIFY_DONE;
+
+ /*
+ * If it is correctable error, return.
+ */
+ if (mce_is_correctable(m))
+ return NOTIFY_OK;
+
+ /*
+ * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+ */
+ gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+ adev = find_adev(gpu_id);
+ if (!adev) {
+ DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
+ gpu_id);
+ return NOTIFY_DONE;
+ }
+
+ /*
+ * If it is uncorrectable error, then find out UMC instance and
+ * channel index.
+ */
+ umc_inst = GET_UMC_INST(m->ipid);
+ ch_inst = GET_CHAN_INDEX(m->ipid);
+
+ dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
+ umc_inst, ch_inst);
+
+ if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+ return NOTIFY_OK;
+ else
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block amdgpu_bad_page_nb = {
+ .notifier_call = amdgpu_bad_page_notifier,
+ .priority = MCE_PRIO_UC,
+};
+
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+ /*
+ * Add the adev to the mce_adev_list.
+ * During mode2 reset, amdgpu device is temporarily
+ * removed from the mgpu_info list which can cause
+ * page retirement to fail.
+ * Use this list instead of mgpu_info to find the amdgpu
+ * device on which the UMC error was reported.
+ */
+ mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
+
+ /*
+ * Register the x86 notifier only once
+ * with MCE subsystem.
+ */
+ if (notifier_registered == false) {
+ mce_register_decode_chain(&amdgpu_bad_page_nb);
+ notifier_registered = true;
+ }
+}
+#endif
+
+struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
+{
+ if (!adev)
+ return NULL;
+
+ return adev->psp.ras_context.ras;
+}
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
+{
+ if (!adev)
+ return -EINVAL;
+
+ adev->psp.ras_context.ras = ras_con;
+ return 0;
+}
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+ unsigned int block)
+{
+ int ret = 0;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (block >= AMDGPU_RAS_BLOCK_COUNT)
+ return 0;
+
+ ret = ras && (adev->ras_enabled & (1 << block));
+
+ /* For the special asic with mem ecc enabled but sram ecc
+ * not enabled, even if the ras block is not supported on
+ * .ras_enabled, if the asic supports poison mode and the
+ * ras block has ras configuration, it can be considered
+ * that the ras block supports ras function.
+ */
+ if (!ret &&
+ (block == AMDGPU_RAS_BLOCK__GFX ||
+ block == AMDGPU_RAS_BLOCK__SDMA ||
+ block == AMDGPU_RAS_BLOCK__VCN ||
+ block == AMDGPU_RAS_BLOCK__JPEG) &&
+ amdgpu_ras_is_poison_mode_supported(adev) &&
+ amdgpu_ras_get_ras_block(adev, block, 0))
+ ret = 1;
+
+ return ret;
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ return 0;
+}
+
+
+/* Register each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj)
+{
+ struct amdgpu_ras_block_list *ras_node;
+ if (!adev || !ras_block_obj)
+ return -EINVAL;
+
+ ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
+ if (!ras_node)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ras_node->node);
+ ras_node->ras_obj = ras_block_obj;
+ list_add_tail(&ras_node->node, &adev->ras_list);
+
+ return 0;
+}
+
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
+{
+ if (!err_type_name)
+ return;
+
+ switch (err_type) {
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ sprintf(err_type_name, "correctable");
+ break;
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ sprintf(err_type_name, "uncorrectable");
+ break;
+ default:
+ sprintf(err_type_name, "unknown");
+ break;
+ }
+}
+
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id)
+{
+ uint32_t err_status_lo_data, err_status_lo_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_lo, reg_entry->reg_lo);
+ err_status_lo_data = RREG32(err_status_lo_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
+ !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
+ return false;
+
+ *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
+
+ return true;
+}
+
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt)
+{
+ uint32_t err_status_hi_data, err_status_hi_offset;
+
+ if (!reg_entry)
+ return false;
+
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+ reg_entry->seg_hi, reg_entry->reg_hi);
+ err_status_hi_data = RREG32(err_status_hi_offset);
+
+ if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
+ !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
+ /* keep the check here in case we need to refer to the result later */
+ dev_dbg(adev->dev, "Invalid err_info field\n");
+
+ /* read err count */
+ *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
+
+ return true;
+}
+
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count)
+{
+ uint32_t memory_id;
+ unsigned long err_cnt;
+ char err_type_name[16];
+ uint32_t i, j;
+
+ for (i = 0; i < reg_list_size; i++) {
+ /* query memory_id from err_status_lo */
+ if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
+ instance, &memory_id))
+ continue;
+
+ /* query err_cnt from err_status_hi */
+ if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
+ instance, &err_cnt) ||
+ !err_cnt)
+ continue;
+
+ *err_count += err_cnt;
+
+ /* log the errors */
+ amdgpu_ras_get_error_type_name(err_type, err_type_name);
+ if (!mem_list) {
+ /* memory_list is not supported */
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, memory_id);
+ } else {
+ for (j = 0; j < mem_list_size; j++) {
+ if (memory_id == mem_list[j].memory_id) {
+ dev_info(adev->dev,
+ "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
+ err_cnt, err_type_name,
+ reg_list[i].block_name,
+ instance, mem_list[j].name);
+ break;
+ }
+ }
+ }
+ }
+}
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance)
+{
+ uint32_t err_status_lo_offset, err_status_hi_offset;
+ uint32_t i;
+
+ for (i = 0; i < reg_list_size; i++) {
+ err_status_lo_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_lo, reg_list[i].reg_lo);
+ err_status_hi_offset =
+ AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+ reg_list[i].seg_hi, reg_list[i].reg_hi);
+ WREG32(err_status_lo_offset, 0);
+ WREG32(err_status_hi_offset, 0);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
new file mode 100644
index 000000000..ffb49b2d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -0,0 +1,770 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_RAS_H
+#define _AMDGPU_RAS_H
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include "ta_ras_if.h"
+#include "amdgpu_ras_eeprom.h"
+
+struct amdgpu_iv_entry;
+
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
+
+enum amdgpu_ras_block {
+ AMDGPU_RAS_BLOCK__UMC = 0,
+ AMDGPU_RAS_BLOCK__SDMA,
+ AMDGPU_RAS_BLOCK__GFX,
+ AMDGPU_RAS_BLOCK__MMHUB,
+ AMDGPU_RAS_BLOCK__ATHUB,
+ AMDGPU_RAS_BLOCK__PCIE_BIF,
+ AMDGPU_RAS_BLOCK__HDP,
+ AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ AMDGPU_RAS_BLOCK__DF,
+ AMDGPU_RAS_BLOCK__SMN,
+ AMDGPU_RAS_BLOCK__SEM,
+ AMDGPU_RAS_BLOCK__MP0,
+ AMDGPU_RAS_BLOCK__MP1,
+ AMDGPU_RAS_BLOCK__FUSE,
+ AMDGPU_RAS_BLOCK__MCA,
+ AMDGPU_RAS_BLOCK__VCN,
+ AMDGPU_RAS_BLOCK__JPEG,
+
+ AMDGPU_RAS_BLOCK__LAST
+};
+
+enum amdgpu_ras_mca_block {
+ AMDGPU_RAS_MCA_BLOCK__MP0 = 0,
+ AMDGPU_RAS_MCA_BLOCK__MP1,
+ AMDGPU_RAS_MCA_BLOCK__MPIO,
+ AMDGPU_RAS_MCA_BLOCK__IOHC,
+
+ AMDGPU_RAS_MCA_BLOCK__LAST
+};
+
+#define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
+#define AMDGPU_RAS_MCA_BLOCK_COUNT AMDGPU_RAS_MCA_BLOCK__LAST
+#define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
+
+enum amdgpu_ras_gfx_subblock {
+ /* CPC */
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF */
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG */
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS */
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI */
+ AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ */
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+ AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+ AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges) */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA */
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA */
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4 */
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI */
+ AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP */
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD */
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges) */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2 */
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+ AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank */
+ AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker */
+ AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache */
+ AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
+enum amdgpu_ras_error_type {
+ AMDGPU_RAS_ERROR__NONE = 0,
+ AMDGPU_RAS_ERROR__PARITY = 1,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
+ AMDGPU_RAS_ERROR__POISON = 8,
+};
+
+enum amdgpu_ras_ret {
+ AMDGPU_RAS_SUCCESS = 0,
+ AMDGPU_RAS_FAIL,
+ AMDGPU_RAS_UE,
+ AMDGPU_RAS_CE,
+ AMDGPU_RAS_PT,
+};
+
+/* ras error status reisger fields */
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
+#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18
+#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L
+#define ERR_STATUS__ERR_CNT__SHIFT 0x17
+#define ERR_STATUS__ERR_CNT_MASK 0x03800000L
+
+#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
+ ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
+
+#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
+ (adev->reg_offset[hwip][ip_inst][segment] + (reg))
+
+#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0)
+#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1)
+#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
+
+#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
+#define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
+
+struct amdgpu_ras_err_status_reg_entry {
+ uint32_t hwip;
+ uint32_t ip_inst;
+ uint32_t seg_lo;
+ uint32_t reg_lo;
+ uint32_t seg_hi;
+ uint32_t reg_hi;
+ uint32_t reg_inst;
+ uint32_t flags;
+ const char *block_name;
+};
+
+struct amdgpu_ras_memory_id_entry {
+ uint32_t memory_id;
+ const char *name;
+};
+
+struct ras_common_if {
+ enum amdgpu_ras_block block;
+ enum amdgpu_ras_error_type type;
+ uint32_t sub_block_index;
+ char name[32];
+};
+
+#define MAX_UMC_CHANNEL_NUM 32
+
+struct ecc_info_per_ch {
+ uint16_t ce_count_lo_chip;
+ uint16_t ce_count_hi_chip;
+ uint64_t mca_umc_status;
+ uint64_t mca_umc_addr;
+ uint64_t mca_ceumc_addr;
+};
+
+struct umc_ecc_info {
+ struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
+
+ /* Determine smu ecctable whether support
+ * record correctable error address
+ */
+ int record_ce_addr_supported;
+};
+
+struct amdgpu_ras {
+ /* ras infrastructure */
+ /* for ras itself. */
+ uint32_t features;
+ struct list_head head;
+ /* sysfs */
+ struct device_attribute features_attr;
+ struct bin_attribute badpages_attr;
+ struct dentry *de_ras_eeprom_table;
+ /* block array */
+ struct ras_manager *objs;
+
+ /* gpu recovery */
+ struct work_struct recovery_work;
+ atomic_t in_recovery;
+ struct amdgpu_device *adev;
+ /* error handler data */
+ struct ras_err_handler_data *eh_data;
+ struct mutex recovery_lock;
+
+ uint32_t flags;
+ bool reboot;
+ struct amdgpu_ras_eeprom_control eeprom_control;
+
+ bool error_query_ready;
+
+ /* bad page count threshold */
+ uint32_t bad_page_cnt_threshold;
+
+ /* disable ras error count harvest in recovery */
+ bool disable_ras_err_cnt_harvest;
+
+ /* is poison mode supported */
+ bool poison_supported;
+
+ /* RAS count errors delayed work */
+ struct delayed_work ras_counte_delay_work;
+ atomic_t ras_ue_count;
+ atomic_t ras_ce_count;
+
+ /* record umc error info queried from smu */
+ struct umc_ecc_info umc_ecc;
+
+ /* Indicates smu whether need update bad channel info */
+ bool update_channel_flag;
+
+ /* Record special requirements of gpu reset caller */
+ uint32_t gpu_reset_flags;
+};
+
+struct ras_fs_data {
+ char sysfs_name[32];
+ char debugfs_name[32];
+};
+
+struct ras_err_data {
+ unsigned long ue_count;
+ unsigned long ce_count;
+ unsigned long err_addr_cnt;
+ struct eeprom_table_record *err_addr;
+};
+
+struct ras_err_handler_data {
+ /* point to bad page records array */
+ struct eeprom_table_record *bps;
+ /* the count of entries */
+ int count;
+ /* the space can place new entries */
+ int space_left;
+};
+
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+ /* interrupt bottom half */
+ struct work_struct ih_work;
+ int inuse;
+ /* IP callback */
+ ras_ih_cb cb;
+ /* full of entries */
+ unsigned char *ring;
+ unsigned int ring_size;
+ unsigned int element_size;
+ unsigned int aligned_element_size;
+ unsigned int rptr;
+ unsigned int wptr;
+};
+
+struct ras_manager {
+ struct ras_common_if head;
+ /* reference count */
+ int use;
+ /* ras block link */
+ struct list_head node;
+ /* the device */
+ struct amdgpu_device *adev;
+ /* sysfs */
+ struct device_attribute sysfs_attr;
+ int attr_inuse;
+
+ /* fs node name */
+ struct ras_fs_data fs_data;
+
+ /* IH data */
+ struct ras_ih_data ih_data;
+
+ struct ras_err_data err_data;
+};
+
+struct ras_badpage {
+ unsigned int bp;
+ unsigned int size;
+ unsigned int flags;
+};
+
+/* interfaces for IP */
+struct ras_fs_if {
+ struct ras_common_if head;
+ const char* sysfs_name;
+ char debugfs_name[32];
+};
+
+struct ras_query_if {
+ struct ras_common_if head;
+ unsigned long ue_count;
+ unsigned long ce_count;
+};
+
+struct ras_inject_if {
+ struct ras_common_if head;
+ uint64_t address;
+ uint64_t value;
+ uint32_t instance_mask;
+};
+
+struct ras_cure_if {
+ struct ras_common_if head;
+ uint64_t address;
+};
+
+struct ras_ih_if {
+ struct ras_common_if head;
+ ras_ih_cb cb;
+};
+
+struct ras_dispatch_if {
+ struct ras_common_if head;
+ struct amdgpu_iv_entry *entry;
+};
+
+struct ras_debug_if {
+ union {
+ struct ras_common_if head;
+ struct ras_inject_if inject;
+ };
+ int op;
+};
+
+struct amdgpu_ras_block_object {
+ struct ras_common_if ras_comm;
+
+ int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index);
+ int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ ras_ih_cb ras_cb;
+ const struct amdgpu_ras_block_hw_ops *hw_ops;
+};
+
+struct amdgpu_ras_block_hw_ops {
+ int (*ras_error_inject)(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
+ void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*query_ras_error_status)(struct amdgpu_device *adev);
+ void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
+ void (*reset_ras_error_status)(struct amdgpu_device *adev);
+ bool (*query_poison_status)(struct amdgpu_device *adev);
+ bool (*handle_poison_consumption)(struct amdgpu_device *adev);
+};
+
+/* work flow
+ * vbios
+ * 1: ras feature enable (enabled by default)
+ * psp
+ * 2: ras framework init (in ip_init)
+ * IP
+ * 3: IH add
+ * 4: debugfs/sysfs create
+ * 5: query/inject
+ * 6: debugfs/sysfs remove
+ * 7: IH remove
+ * 8: feature disable
+ */
+
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
+
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count,
+ struct ras_query_if *query_info);
+
+/* error handling functions */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages);
+
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+ unsigned long *new_cnt);
+
+static inline enum ta_ras_block
+amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
+ switch (block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ return TA_RAS_BLOCK__UMC;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ return TA_RAS_BLOCK__SDMA;
+ case AMDGPU_RAS_BLOCK__GFX:
+ return TA_RAS_BLOCK__GFX;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ return TA_RAS_BLOCK__MMHUB;
+ case AMDGPU_RAS_BLOCK__ATHUB:
+ return TA_RAS_BLOCK__ATHUB;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ return TA_RAS_BLOCK__PCIE_BIF;
+ case AMDGPU_RAS_BLOCK__HDP:
+ return TA_RAS_BLOCK__HDP;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ return TA_RAS_BLOCK__XGMI_WAFL;
+ case AMDGPU_RAS_BLOCK__DF:
+ return TA_RAS_BLOCK__DF;
+ case AMDGPU_RAS_BLOCK__SMN:
+ return TA_RAS_BLOCK__SMN;
+ case AMDGPU_RAS_BLOCK__SEM:
+ return TA_RAS_BLOCK__SEM;
+ case AMDGPU_RAS_BLOCK__MP0:
+ return TA_RAS_BLOCK__MP0;
+ case AMDGPU_RAS_BLOCK__MP1:
+ return TA_RAS_BLOCK__MP1;
+ case AMDGPU_RAS_BLOCK__FUSE:
+ return TA_RAS_BLOCK__FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return TA_RAS_BLOCK__MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return TA_RAS_BLOCK__VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return TA_RAS_BLOCK__JPEG;
+ default:
+ WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
+ return TA_RAS_BLOCK__UMC;
+ }
+}
+
+static inline enum ta_ras_error_type
+amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
+ switch (error) {
+ case AMDGPU_RAS_ERROR__NONE:
+ return TA_RAS_ERROR__NONE;
+ case AMDGPU_RAS_ERROR__PARITY:
+ return TA_RAS_ERROR__PARITY;
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ return TA_RAS_ERROR__SINGLE_CORRECTABLE;
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ return TA_RAS_ERROR__MULTI_UNCORRECTABLE;
+ case AMDGPU_RAS_ERROR__POISON:
+ return TA_RAS_ERROR__POISON;
+ default:
+ WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error);
+ return TA_RAS_ERROR__NONE;
+ }
+}
+
+/* called in ip_init and ip_fini */
+int amdgpu_ras_init(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_ras_fini(struct amdgpu_device *adev);
+int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
+
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+
+int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+ struct ras_common_if *head, bool enable);
+
+int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
+ struct ras_common_if *head, bool enable);
+
+int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
+
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
+ struct ras_query_if *info);
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+
+int amdgpu_ras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info);
+
+int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
+ struct ras_dispatch_if *info);
+
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+extern atomic_t amdgpu_ras_in_intr;
+
+static inline bool amdgpu_ras_intr_triggered(void)
+{
+ return !!atomic_read(&amdgpu_ras_in_intr);
+}
+
+static inline void amdgpu_ras_intr_cleared(void)
+{
+ atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev);
+
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
+
+const char *get_ras_block_str(struct ras_common_if *ras_block);
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
+struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
+
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+ struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ uint32_t *memory_id);
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+ uint32_t instance,
+ unsigned long *err_cnt);
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ const struct amdgpu_ras_memory_id_entry *mem_list,
+ uint32_t mem_list_size,
+ uint32_t instance,
+ uint32_t err_type,
+ unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ const struct amdgpu_ras_err_status_reg_entry *reg_list,
+ uint32_t reg_list_size,
+ uint32_t instance);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
new file mode 100644
index 000000000..9d82701d3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -0,0 +1,1420 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras_eeprom.h"
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include <linux/bits.h>
+#include "atom.h"
+#include "amdgpu_eeprom.h"
+#include "amdgpu_atomfirmware.h"
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include "amdgpu_reset.h"
+
+/* These are memory addresses as would be seen by one or more EEPROM
+ * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
+ * set of EEPROM devices. They form a continuous memory space.
+ *
+ * The I2C device address includes the device type identifier, 1010b,
+ * which is a reserved value and indicates that this is an I2C EEPROM
+ * device. It also includes the top 3 bits of the 19 bit EEPROM memory
+ * address, namely bits 18, 17, and 16. This makes up the 7 bit
+ * address sent on the I2C bus with bit 0 being the direction bit,
+ * which is not represented here, and sent by the hardware directly.
+ *
+ * For instance,
+ * 50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
+ * 54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
+ * 56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
+ * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
+ * address memory in a device or a device on the I2C bus, depending on
+ * the status of pins 1-3. See top of amdgpu_eeprom.c.
+ *
+ * The RAS table lives either at address 0 or address 40000h of EEPROM.
+ */
+#define EEPROM_I2C_MADDR_0 0x0
+#define EEPROM_I2C_MADDR_4 0x40000
+
+/*
+ * The 2 macros bellow represent the actual size in bytes that
+ * those entities occupy in the EEPROM memory.
+ * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
+ * uses uint64 to store 6b fields such as retired_page.
+ */
+#define RAS_TABLE_HEADER_SIZE 20
+#define RAS_TABLE_RECORD_SIZE 24
+
+/* Table hdr is 'AMDR' */
+#define RAS_TABLE_HDR_VAL 0x414d4452
+
+/* Bad GPU tag ‘BADG’ */
+#define RAS_TABLE_HDR_BAD 0x42414447
+
+/*
+ * EEPROM Table structure v1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
+/* Assume 2-Mbit size EEPROM and take up the whole space. */
+#define RAS_TBL_SIZE_BYTES (256 * 1024)
+#define RAS_TABLE_START 0
+#define RAS_HDR_START RAS_TABLE_START
+#define RAS_RECORD_START (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
+#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
+
+/*
+ * EEPROM Table structrue v2.1
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE HEADER |
+ * | ( size 20 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | EEPROM TABLE RAS INFO |
+ * | (available info size 4 Bytes) |
+ * | ( reserved size 252 Bytes ) |
+ * | |
+ * ---------------------------------
+ * | |
+ * | BAD PAGE RECORD AREA |
+ * | |
+ * ---------------------------------
+ */
+
+/* EEPROM Table V2_1 */
+#define RAS_TABLE_V2_1_INFO_SIZE 256
+#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
+#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
+ RAS_TABLE_V2_1_INFO_SIZE)
+#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) \
+ / RAS_TABLE_RECORD_SIZE)
+
+/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
+ * offset off of RAS_TABLE_START. That is, this is something you can
+ * add to control->i2c_address, and then tell I2C layer to read
+ * from/write to there. _N is the so called absolute index,
+ * because it starts right after the table header.
+ */
+#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \
+ (_N) * RAS_TABLE_RECORD_SIZE)
+
+#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \
+ (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE)
+
+/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off
+ * of "fri", return the absolute record index off of the end of
+ * the table header.
+ */
+#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \
+ (_C)->ras_max_record_count)
+
+#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \
+ RAS_TABLE_HEADER_SIZE - \
+ RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
+
+#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
+
+static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
+ case IP_VERSION(11, 0, 7): /* Sienna cichlid */
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 2): /* Aldebaran */
+ case IP_VERSION(13, 0, 10):
+ return true;
+ case IP_VERSION(13, 0, 6):
+ return (adev->gmc.is_app_apu) ? false : true;
+ default:
+ return false;
+ }
+}
+
+static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
+ struct amdgpu_ras_eeprom_control *control)
+{
+ struct atom_context *atom_ctx = adev->mode_info.atom_context;
+ u8 i2c_addr;
+
+ if (!control)
+ return false;
+
+ if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+ /* The address given by VBIOS is an 8-bit, wire-format
+ * address, i.e. the most significant byte.
+ *
+ * Normalize it to a 19-bit EEPROM address. Remove the
+ * device type identifier and make it a 7-bit address;
+ * then make it a 19-bit EEPROM address. See top of
+ * amdgpu_eeprom.c.
+ */
+ i2c_addr = (i2c_addr & 0x0F) >> 1;
+ control->i2c_address = ((u32) i2c_addr) << 16;
+
+ return true;
+ }
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(11, 0, 2):
+ /* VEGA20 and ARCTURUS */
+ if (adev->asic_type == CHIP_VEGA20)
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else if (strnstr(atom_ctx->vbios_pn,
+ "D342",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(11, 0, 7):
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 2):
+ if (strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ return true;
+ case IP_VERSION(13, 0, 0):
+ if (strnstr(atom_ctx->vbios_pn, "D707",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 10):
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void
+__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+
+ pp[0] = cpu_to_le32(hdr->header);
+ pp[1] = cpu_to_le32(hdr->version);
+ pp[2] = cpu_to_le32(hdr->first_rec_offset);
+ pp[3] = cpu_to_le32(hdr->tbl_size);
+ pp[4] = cpu_to_le32(hdr->checksum);
+}
+
+static void
+__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+
+ hdr->header = le32_to_cpu(pp[0]);
+ hdr->version = le32_to_cpu(pp[1]);
+ hdr->first_rec_offset = le32_to_cpu(pp[2]);
+ hdr->tbl_size = le32_to_cpu(pp[3]);
+ hdr->checksum = le32_to_cpu(pp[4]);
+}
+
+static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
+{
+ u8 buf[RAS_TABLE_HEADER_SIZE];
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int res;
+
+ memset(buf, 0, sizeof(buf));
+ __encode_table_header_to_buf(&control->tbl_hdr, buf);
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_header_offset,
+ buf, RAS_TABLE_HEADER_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ DRM_ERROR("Failed to write EEPROM table header:%d", res);
+ } else if (res < RAS_TABLE_HEADER_SIZE) {
+ DRM_ERROR("Short write:%d out of %d\n",
+ res, RAS_TABLE_HEADER_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ return res;
+}
+
+static void
+__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
+ (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
+ (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
+ pp[0] = cpu_to_le32(tmp);
+}
+
+static void
+__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+ unsigned char *buf)
+{
+ u32 *pp = (uint32_t *)buf;
+ u32 tmp;
+
+ tmp = le32_to_cpu(pp[0]);
+ rai->rma_status = tmp & 0xFF;
+ rai->health_percent = (tmp >> 8) & 0xFF;
+ rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
+}
+
+static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u8 *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("Failed to alloc buf to write table ras info\n");
+ return -ENOMEM;
+ }
+
+ __encode_table_ras_info_to_buf(&control->tbl_rai, buf);
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ up_read(&adev->reset_domain->sem);
+
+ if (res < 0) {
+ DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
+ } else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ DRM_ERROR("Short write:%d out of %d\n",
+ res, RAS_TABLE_V2_1_INFO_SIZE);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ kfree(buf);
+
+ return res;
+}
+
+static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
+
+ /* Header checksum, skip checksum field in the calculation */
+ sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
+ pp = (u8 *) &control->tbl_hdr;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
+
+ return csum;
+}
+
+static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+ int ii;
+ u8 *pp, csum;
+ size_t sz;
+
+ sz = sizeof(control->tbl_rai);
+ pp = (u8 *) &control->tbl_rai;
+ csum = 0;
+ for (ii = 0; ii < sz; ii++, pp++)
+ csum += *pp;
+
+ return csum;
+}
+
+static int amdgpu_ras_eeprom_correct_header_tag(
+ struct amdgpu_ras_eeprom_control *control,
+ uint32_t header)
+{
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ u8 *hh;
+ int res;
+ u8 csum;
+
+ csum = -hdr->checksum;
+
+ hh = (void *) &hdr->header;
+ csum -= (hh[0] + hh[1] + hh[2] + hh[3]);
+ hh = (void *) &header;
+ csum += hh[0] + hh[1] + hh[2] + hh[3];
+ csum = -csum;
+ mutex_lock(&control->ras_tbl_mutex);
+ hdr->header = header;
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ mutex_unlock(&control->ras_tbl_mutex);
+
+ return res;
+}
+
+/**
+ * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
+ * @control: pointer to control structure
+ *
+ * Reset the contents of the header of the RAS EEPROM table.
+ * Return 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u8 csum;
+ int res;
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ hdr->header = RAS_TABLE_HDR_VAL;
+ if (adev->umc.ras &&
+ adev->umc.ras->set_eeprom_table_version)
+ adev->umc.ras->set_eeprom_table_version(hdr);
+ else
+ hdr->version = RAS_TABLE_VER_V1;
+
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE;
+ rai->rma_status = GPU_HEALTH_USABLE;
+ /**
+ * GPU health represented as a percentage.
+ * 0 means worst health, 100 means fully health.
+ */
+ rai->health_percent = 100;
+ /* ecc_page_threshold = 0 means disable bad page retirement */
+ rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+ } else {
+ hdr->first_rec_offset = RAS_RECORD_START;
+ hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+ }
+
+ csum = __calc_hdr_byte_sum(control);
+ if (hdr->version == RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ csum = -csum;
+ hdr->checksum = csum;
+ res = __write_table_header(control);
+ if (!res && hdr->version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
+
+ control->ras_num_recs = 0;
+ control->ras_fri = 0;
+
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
+
+ control->bad_channel_bitmap = 0;
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+
+ amdgpu_ras_debugfs_set_ret_size(control);
+
+ mutex_unlock(&control->ras_tbl_mutex);
+
+ return res;
+}
+
+static void
+__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buf)
+{
+ __le64 tmp = 0;
+ int i = 0;
+
+ /* Next are all record fields according to EEPROM page spec in LE foramt */
+ buf[i++] = record->err_type;
+
+ buf[i++] = record->bank;
+
+ tmp = cpu_to_le64(record->ts);
+ memcpy(buf + i, &tmp, 8);
+ i += 8;
+
+ tmp = cpu_to_le64((record->offset & 0xffffffffffff));
+ memcpy(buf + i, &tmp, 6);
+ i += 6;
+
+ buf[i++] = record->mem_channel;
+ buf[i++] = record->mcumc_id;
+
+ tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
+ memcpy(buf + i, &tmp, 6);
+}
+
+static void
+__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ unsigned char *buf)
+{
+ __le64 tmp = 0;
+ int i = 0;
+
+ /* Next are all record fields according to EEPROM page spec in LE foramt */
+ record->err_type = buf[i++];
+
+ record->bank = buf[i++];
+
+ memcpy(&tmp, buf + i, 8);
+ record->ts = le64_to_cpu(tmp);
+ i += 8;
+
+ memcpy(&tmp, buf + i, 6);
+ record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
+ i += 6;
+
+ record->mem_channel = buf[i++];
+ record->mcumc_id = buf[i++];
+
+ memcpy(&tmp, buf + i, 6);
+ record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
+}
+
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!__is_ras_eeprom_supported(adev) ||
+ !amdgpu_bad_page_threshold)
+ return false;
+
+ /* skip check eeprom table for VEGA20 Gaming */
+ if (!con)
+ return false;
+ else
+ if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
+ return false;
+
+ if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
+ if (amdgpu_bad_page_threshold == -1) {
+ dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
+ con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
+ dev_warn(adev->dev,
+ "But GPU can be operated due to bad_page_threshold = -1.\n");
+ return false;
+ } else {
+ dev_warn(adev->dev, "This GPU is in BAD status.");
+ dev_warn(adev->dev, "Please retire it or set a larger "
+ "threshold value when reloading driver.\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM
+ * @control: pointer to control structure
+ * @buf: pointer to buffer containing data to write
+ * @fri: start writing at this index
+ * @num: number of records to write
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
+ */
+static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
+ u8 *buf, const u32 fri, const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u32 buf_size;
+ int res;
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ buf_size = num * RAS_TABLE_RECORD_SIZE;
+ res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ RAS_INDEX_TO_OFFSET(control, fri),
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ DRM_ERROR("Writing %d EEPROM table records error:%d",
+ num, res);
+ } else if (res < buf_size) {
+ /* Short write, return error.
+ */
+ DRM_ERROR("Wrote %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ return res;
+}
+
+static int
+amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ u32 a, b, i;
+ u8 *buf, *pp;
+ int res;
+
+ buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Encode all of them in one go.
+ */
+ pp = buf;
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+ __encode_table_record_to_buf(control, &record[i], pp);
+
+ /* update bad channel bitmap */
+ if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
+ }
+
+ /* a, first record index to write into.
+ * b, last record index to write into.
+ * a = first index to read (fri) + number of records in the table,
+ * b = a + @num - 1.
+ * Let N = control->ras_max_num_record_count, then we have,
+ * case 0: 0 <= a <= b < N,
+ * just append @num records starting at a;
+ * case 1: 0 <= a < N <= b,
+ * append (N - a) records starting at a, and
+ * append the remainder, b % N + 1, starting at 0.
+ * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases,
+ * case 2a: 0 <= a <= b < N
+ * append num records starting at a; and fix fri if b overwrote it,
+ * and since a <= b, if b overwrote it then a must've also,
+ * and if b didn't overwrite it, then a didn't also.
+ * case 2b: 0 <= b < a < N
+ * write num records starting at a, which wraps around 0=N
+ * and overwrite fri unconditionally. Now from case 2a,
+ * this means that b eclipsed fri to overwrite it and wrap
+ * around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally
+ * set fri = b + 1 (mod N).
+ * Now, since fri is updated in every case, except the trivial case 0,
+ * the number of records present in the table after writing, is,
+ * num_recs - 1 = b - fri (mod N), and we take the positive value,
+ * by adding an arbitrary multiple of N before taking the modulo N
+ * as shown below.
+ */
+ a = control->ras_fri + control->ras_num_recs;
+ b = a + num - 1;
+ if (b < control->ras_max_record_count) {
+ res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+ } else if (a < control->ras_max_record_count) {
+ u32 g0, g1;
+
+ g0 = control->ras_max_record_count - a;
+ g1 = b % control->ras_max_record_count + 1;
+ res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+ if (res)
+ goto Out;
+ res = __amdgpu_ras_eeprom_write(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
+ if (g1 > control->ras_fri)
+ control->ras_fri = g1 % control->ras_max_record_count;
+ } else {
+ a %= control->ras_max_record_count;
+ b %= control->ras_max_record_count;
+
+ if (a <= b) {
+ /* Note that, b - a + 1 = num. */
+ res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+ if (res)
+ goto Out;
+ if (b >= control->ras_fri)
+ control->ras_fri = (b + 1) % control->ras_max_record_count;
+ } else {
+ u32 g0, g1;
+
+ /* b < a, which means, we write from
+ * a to the end of the table, and from
+ * the start of the table to b.
+ */
+ g0 = control->ras_max_record_count - a;
+ g1 = b + 1;
+ res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+ if (res)
+ goto Out;
+ res = __amdgpu_ras_eeprom_write(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
+ control->ras_fri = g1 % control->ras_max_record_count;
+ }
+ }
+ control->ras_num_recs = 1 + (control->ras_max_record_count + b
+ - control->ras_fri)
+ % control->ras_max_record_count;
+Out:
+ kfree(buf);
+ return res;
+}
+
+static int
+amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u8 *buf, *pp, csum;
+ u32 buf_size;
+ int res;
+
+ /* Modify the header if it exceeds.
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_recs >= ras->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_recs, ras->bad_page_cnt_threshold);
+ control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
+ control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+ control->tbl_rai.health_percent = 0;
+ }
+ }
+
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ control->tbl_hdr.checksum = 0;
+
+ buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("allocating memory for table of size %d bytes failed\n",
+ control->tbl_hdr.tbl_size);
+ res = -ENOMEM;
+ goto Out;
+ }
+
+ down_read(&adev->reset_domain->sem);
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_record_offset,
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ DRM_ERROR("EEPROM failed reading records:%d\n",
+ res);
+ goto Out;
+ } else if (res < buf_size) {
+ DRM_ERROR("EEPROM read %d out of %d bytes\n",
+ res, buf_size);
+ res = -EIO;
+ goto Out;
+ }
+
+ /**
+ * bad page records have been stored in eeprom,
+ * now calculate gpu health percent
+ */
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
+ control->ras_num_recs < ras->bad_page_cnt_threshold)
+ control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
+ control->ras_num_recs) * 100) /
+ ras->bad_page_cnt_threshold;
+
+ /* Recalc the checksum.
+ */
+ csum = 0;
+ for (pp = buf; pp < buf + buf_size; pp++)
+ csum += *pp;
+
+ csum += __calc_hdr_byte_sum(control);
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ csum += __calc_ras_info_byte_sum(control);
+ /* avoid sign extension when assigning to "checksum" */
+ csum = -csum;
+ control->tbl_hdr.checksum = csum;
+ res = __write_table_header(control);
+ if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
+ res = __write_table_ras_info(control);
+Out:
+ kfree(buf);
+ return res;
+}
+
+/**
+ * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
+ * @control: pointer to control structure
+ * @record: array of records to append
+ * @num: number of records in @record array
+ *
+ * Append @num records to the table, calculate the checksum and write
+ * the table back to EEPROM. The maximum number of records that
+ * can be appended is between 1 and control->ras_max_record_count,
+ * regardless of how many records are already stored in the table.
+ *
+ * Return 0 on success or if EEPROM is not supported, -errno on error.
+ */
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int res;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ if (num == 0) {
+ DRM_ERROR("will not append 0 records\n");
+ return -EINVAL;
+ } else if (num > control->ras_max_record_count) {
+ DRM_ERROR("cannot append %d records than the size of table %d\n",
+ num, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ res = amdgpu_ras_eeprom_append_table(control, record, num);
+ if (!res)
+ res = amdgpu_ras_eeprom_update_header(control);
+ if (!res)
+ amdgpu_ras_debugfs_set_ret_size(control);
+
+ mutex_unlock(&control->ras_tbl_mutex);
+ return res;
+}
+
+/**
+ * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer
+ * @control: pointer to control structure
+ * @buf: pointer to buffer to read into
+ * @fri: first record index, start reading at this index, absolute index
+ * @num: number of records to read
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
+ */
+static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ u8 *buf, const u32 fri, const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ u32 buf_size;
+ int res;
+
+ /* i2c may be unstable in gpu reset */
+ down_read(&adev->reset_domain->sem);
+ buf_size = num * RAS_TABLE_RECORD_SIZE;
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ RAS_INDEX_TO_OFFSET(control, fri),
+ buf, buf_size);
+ up_read(&adev->reset_domain->sem);
+ if (res < 0) {
+ DRM_ERROR("Reading %d EEPROM table records error:%d",
+ num, res);
+ } else if (res < buf_size) {
+ /* Short read, return error.
+ */
+ DRM_ERROR("Read %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
+ res = -EIO;
+ } else {
+ res = 0;
+ }
+
+ return res;
+}
+
+/**
+ * amdgpu_ras_eeprom_read -- read EEPROM
+ * @control: pointer to control structure
+ * @record: array of records to read into
+ * @num: number of records in @record
+ *
+ * Reads num records from the RAS table in EEPROM and
+ * writes the data into @record array.
+ *
+ * Returns 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int i, res;
+ u8 *buf, *pp;
+ u32 g0, g1;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ if (num == 0) {
+ DRM_ERROR("will not read 0 records\n");
+ return -EINVAL;
+ } else if (num > control->ras_num_recs) {
+ DRM_ERROR("too many records to read:%d available:%d\n",
+ num, control->ras_num_recs);
+ return -EINVAL;
+ }
+
+ buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Determine how many records to read, from the first record
+ * index, fri, to the end of the table, and from the beginning
+ * of the table, such that the total number of records is
+ * @num, and we handle wrap around when fri > 0 and
+ * fri + num > RAS_MAX_RECORD_COUNT.
+ *
+ * First we compute the index of the last element
+ * which would be fetched from each region,
+ * g0 is in [fri, fri + num - 1], and
+ * g1 is in [0, RAS_MAX_RECORD_COUNT - 1].
+ * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of
+ * the last element to fetch, we set g0 to _the number_
+ * of elements to fetch, @num, since we know that the last
+ * indexed to be fetched does not exceed the table.
+ *
+ * If, however, g0 >= RAS_MAX_RECORD_COUNT, then
+ * we set g0 to the number of elements to read
+ * until the end of the table, and g1 to the number of
+ * elements to read from the beginning of the table.
+ */
+ g0 = control->ras_fri + num - 1;
+ g1 = g0 % control->ras_max_record_count;
+ if (g0 < control->ras_max_record_count) {
+ g0 = num;
+ g1 = 0;
+ } else {
+ g0 = control->ras_max_record_count - control->ras_fri;
+ g1 += 1;
+ }
+
+ mutex_lock(&control->ras_tbl_mutex);
+ res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0);
+ if (res)
+ goto Out;
+ if (g1) {
+ res = __amdgpu_ras_eeprom_read(control,
+ buf + g0 * RAS_TABLE_RECORD_SIZE,
+ 0, g1);
+ if (res)
+ goto Out;
+ }
+
+ res = 0;
+
+ /* Read up everything? Then transform.
+ */
+ pp = buf;
+ for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+ __decode_table_record_from_buf(control, &record[i], pp);
+
+ /* update bad channel bitmap */
+ if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+ con->update_channel_flag = true;
+ }
+ }
+Out:
+ kfree(buf);
+ mutex_unlock(&control->ras_tbl_mutex);
+
+ return res;
+}
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
+{
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ return RAS_MAX_RECORD_COUNT_V2_1;
+ else
+ return RAS_MAX_RECORD_COUNT;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+ u8 data[50];
+ int res;
+
+ if (!size)
+ return size;
+
+ if (!ras || !control) {
+ res = snprintf(data, sizeof(data), "Not supported\n");
+ } else {
+ res = snprintf(data, sizeof(data), "%d bytes or %d records\n",
+ RAS_TBL_SIZE_BYTES, control->ras_max_record_count);
+ }
+
+ if (*pos >= res)
+ return 0;
+
+ res -= *pos;
+ res = min_t(size_t, res, size);
+
+ if (copy_to_user(buf, &data[*pos], res))
+ return -EFAULT;
+
+ *pos += res;
+
+ return res;
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ras_debugfs_eeprom_size_read,
+ .write = NULL,
+ .llseek = default_llseek,
+};
+
+static const char *tbl_hdr_str = " Signature Version FirstOffs Size Checksum\n";
+static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n";
+#define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1)
+static const char *rec_hdr_str = "Index Offset ErrType Bank/CU TimeStamp Offs/Addr MemChl MCUMCID RetiredPage\n";
+static const char *rec_hdr_fmt = "%5d 0x%05X %7s 0x%02X 0x%016llX 0x%012llX 0x%02X 0x%02X 0x%012llX\n";
+#define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1)
+
+static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = {
+ "ignore",
+ "re",
+ "ue",
+};
+
+static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control)
+{
+ return strlen(tbl_hdr_str) + tbl_hdr_fmt_size +
+ strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs;
+}
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras,
+ eeprom_control);
+ struct dentry *de = ras->de_ras_eeprom_table;
+
+ if (de)
+ d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control);
+}
+
+static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control;
+ const size_t orig_size = size;
+ int res = -EFAULT;
+ size_t data_len;
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ /* We want *pos - data_len > 0, which means there's
+ * bytes to be printed from data.
+ */
+ data_len = strlen(tbl_hdr_str);
+ if (*pos < data_len) {
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
+
+ data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size;
+ if (*pos < data_len && size > 0) {
+ u8 data[tbl_hdr_fmt_size + 1];
+ loff_t lpos;
+
+ snprintf(data, sizeof(data), tbl_hdr_fmt,
+ control->tbl_hdr.header,
+ control->tbl_hdr.version,
+ control->tbl_hdr.first_rec_offset,
+ control->tbl_hdr.tbl_size,
+ control->tbl_hdr.checksum);
+
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ lpos = *pos - strlen(tbl_hdr_str);
+ if (copy_to_user(buf, &data[lpos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
+
+ data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str);
+ if (*pos < data_len && size > 0) {
+ loff_t lpos;
+
+ data_len -= *pos;
+ data_len = min_t(size_t, data_len, size);
+ lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size;
+ if (copy_to_user(buf, &rec_hdr_str[lpos], data_len))
+ goto Out;
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ }
+
+ data_len = amdgpu_ras_debugfs_table_size(control);
+ if (*pos < data_len && size > 0) {
+ u8 dare[RAS_TABLE_RECORD_SIZE];
+ u8 data[rec_hdr_fmt_size + 1];
+ struct eeprom_table_record record;
+ int s, r;
+
+ /* Find the starting record index
+ */
+ s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+ strlen(rec_hdr_str);
+ s = s / rec_hdr_fmt_size;
+ r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+ strlen(rec_hdr_str);
+ r = r % rec_hdr_fmt_size;
+
+ for ( ; size > 0 && s < control->ras_num_recs; s++) {
+ u32 ai = RAS_RI_TO_AI(control, s);
+ /* Read a single record
+ */
+ res = __amdgpu_ras_eeprom_read(control, dare, ai, 1);
+ if (res)
+ goto Out;
+ __decode_table_record_from_buf(control, &record, dare);
+ snprintf(data, sizeof(data), rec_hdr_fmt,
+ s,
+ RAS_INDEX_TO_OFFSET(control, ai),
+ record_err_type_str[record.err_type],
+ record.bank,
+ record.ts,
+ record.offset,
+ record.mem_channel,
+ record.mcumc_id,
+ record.retired_page);
+
+ data_len = min_t(size_t, rec_hdr_fmt_size - r, size);
+ if (copy_to_user(buf, &data[r], data_len)) {
+ res = -EFAULT;
+ goto Out;
+ }
+ buf += data_len;
+ size -= data_len;
+ *pos += data_len;
+ r = 0;
+ }
+ }
+ res = 0;
+Out:
+ mutex_unlock(&control->ras_tbl_mutex);
+ return res < 0 ? res : orig_size - size;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+ u8 data[81];
+ int res;
+
+ if (!size)
+ return size;
+
+ if (!ras || !control) {
+ res = snprintf(data, sizeof(data), "Not supported\n");
+ if (*pos >= res)
+ return 0;
+
+ res -= *pos;
+ res = min_t(size_t, res, size);
+
+ if (copy_to_user(buf, &data[*pos], res))
+ return -EFAULT;
+
+ *pos += res;
+
+ return res;
+ } else {
+ return amdgpu_ras_debugfs_table_read(f, buf, size, pos);
+ }
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ras_debugfs_eeprom_table_read,
+ .write = NULL,
+ .llseek = default_llseek,
+};
+
+/**
+ * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum
+ * @control: pointer to control structure
+ *
+ * Check the checksum of the stored in EEPROM RAS table.
+ *
+ * Return 0 if the checksum is correct,
+ * positive if it is not correct, and
+ * -errno on I/O error.
+ */
+static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ int buf_size, res;
+ u8 csum, *buf, *pp;
+
+ if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ RAS_TABLE_V2_1_INFO_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+ else
+ buf_size = RAS_TABLE_HEADER_SIZE +
+ control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+
+ buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("Out of memory checking RAS table checksum.\n");
+ return -ENOMEM;
+ }
+
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address +
+ control->ras_header_offset,
+ buf, buf_size);
+ if (res < buf_size) {
+ DRM_ERROR("Partial read for checksum, res:%d\n", res);
+ /* On partial reads, return -EIO.
+ */
+ if (res >= 0)
+ res = -EIO;
+ goto Out;
+ }
+
+ csum = 0;
+ for (pp = buf; pp < buf + buf_size; pp++)
+ csum += *pp;
+Out:
+ kfree(buf);
+ return res < 0 ? res : csum;
+}
+
+static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char *buf;
+ int res;
+
+ buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+ if (!buf) {
+ DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
+ return -ENOMEM;
+ }
+
+ /**
+ * EEPROM table V2_1 supports ras info,
+ * read EEPROM table ras info
+ */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_info_offset,
+ buf, RAS_TABLE_V2_1_INFO_SIZE);
+ if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+ DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
+ res = res >= 0 ? -EIO : res;
+ goto Out;
+ }
+
+ __decode_table_ras_info_from_buf(rai, buf);
+
+Out:
+ kfree(buf);
+ return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
+}
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
+ bool *exceed_err_limit)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res;
+
+ *exceed_err_limit = false;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_header_offset = RAS_HDR_START;
+ control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
+ mutex_init(&control->ras_tbl_mutex);
+
+ /* Read the table header from EEPROM address */
+ res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+ control->i2c_address + control->ras_header_offset,
+ buf, RAS_TABLE_HEADER_SIZE);
+ if (res < RAS_TABLE_HEADER_SIZE) {
+ DRM_ERROR("Failed to read EEPROM table header, res:%d", res);
+ return res >= 0 ? -EIO : res;
+ }
+
+ __decode_table_header_from_buf(hdr, buf);
+
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+ control->ras_record_offset = RAS_RECORD_START_V2_1;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+ } else {
+ control->ras_num_recs = RAS_NUM_RECS(hdr);
+ control->ras_record_offset = RAS_RECORD_START;
+ control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ }
+ control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+
+ if (hdr->header == RAS_TABLE_HDR_VAL) {
+ DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
+ control->ras_num_recs);
+
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
+ res = __verify_ras_table_checksum(control);
+ if (res)
+ DRM_ERROR("RAS table incorrect checksum or error:%d\n",
+ res);
+
+ /* Warn if we are at 90% of the threshold or above
+ */
+ if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
+ dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+ control->ras_num_recs,
+ ras->bad_page_cnt_threshold);
+ } else if (hdr->header == RAS_TABLE_HDR_BAD &&
+ amdgpu_bad_page_threshold != 0) {
+ if (hdr->version == RAS_TABLE_VER_V2_1) {
+ res = __read_table_ras_info(control);
+ if (res)
+ return res;
+ }
+
+ res = __verify_ras_table_checksum(control);
+ if (res)
+ DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
+ res);
+ if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
+ /* This means that, the threshold was increased since
+ * the last time the system was booted, and now,
+ * ras->bad_page_cnt_threshold - control->num_recs > 0,
+ * so that at least one more record can be saved,
+ * before the page count threshold is reached.
+ */
+ dev_info(adev->dev,
+ "records:%d threshold:%d, resetting "
+ "RAS table header signature",
+ control->ras_num_recs,
+ ras->bad_page_cnt_threshold);
+ res = amdgpu_ras_eeprom_correct_header_tag(control,
+ RAS_TABLE_HDR_VAL);
+ } else {
+ dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
+ control->ras_num_recs, ras->bad_page_cnt_threshold);
+ if (amdgpu_bad_page_threshold == -1) {
+ dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
+ res = 0;
+ } else {
+ *exceed_err_limit = true;
+ dev_err(adev->dev,
+ "RAS records:%d exceed threshold:%d, "
+ "GPU will not be initialized. Replace this GPU or increase the threshold",
+ control->ras_num_recs, ras->bad_page_cnt_threshold);
+ }
+ }
+ } else {
+ DRM_INFO("Creating a new EEPROM table");
+
+ res = amdgpu_ras_eeprom_reset_table(control);
+ }
+
+ return res < 0 ? res : 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
new file mode 100644
index 000000000..6dfd667f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_RAS_EEPROM_H
+#define _AMDGPU_RAS_EEPROM_H
+
+#include <linux/i2c.h>
+
+#define RAS_TABLE_VER_V1 0x00010000
+#define RAS_TABLE_VER_V2_1 0x00021000
+
+struct amdgpu_device;
+
+enum amdgpu_ras_gpu_health_status {
+ GPU_HEALTH_USABLE = 0,
+ GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
+enum amdgpu_ras_eeprom_err_type {
+ AMDGPU_RAS_EEPROM_ERR_NA,
+ AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
+ AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE,
+ AMDGPU_RAS_EEPROM_ERR_COUNT,
+};
+
+struct amdgpu_ras_eeprom_table_header {
+ uint32_t header;
+ uint32_t version;
+ uint32_t first_rec_offset;
+ uint32_t tbl_size;
+ uint32_t checksum;
+} __packed;
+
+struct amdgpu_ras_eeprom_table_ras_info {
+ u8 rma_status;
+ u8 health_percent;
+ u16 ecc_page_threshold;
+ u32 padding[64 - 1];
+} __packed;
+
+struct amdgpu_ras_eeprom_control {
+ struct amdgpu_ras_eeprom_table_header tbl_hdr;
+
+ struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
+ /* Base I2C EEPPROM 19-bit memory address,
+ * where the table is located. For more information,
+ * see top of amdgpu_eeprom.c.
+ */
+ u32 i2c_address;
+
+ /* The byte offset off of @i2c_address
+ * where the table header is found,
+ * and where the records start--always
+ * right after the header.
+ */
+ u32 ras_header_offset;
+ u32 ras_info_offset;
+ u32 ras_record_offset;
+
+ /* Number of records in the table.
+ */
+ u32 ras_num_recs;
+
+ /* First record index to read, 0-based.
+ * Range is [0, num_recs-1]. This is
+ * an absolute index, starting right after
+ * the table header.
+ */
+ u32 ras_fri;
+
+ /* Maximum possible number of records
+ * we could store, i.e. the maximum capacity
+ * of the table.
+ */
+ u32 ras_max_record_count;
+
+ /* Protect table access via this mutex.
+ */
+ struct mutex ras_tbl_mutex;
+
+ /* Record channel info which occurred bad pages
+ */
+ u32 bad_channel_bitmap;
+};
+
+/*
+ * Represents single table record. Packed to be easily serialized into byte
+ * stream.
+ */
+struct eeprom_table_record {
+
+ union {
+ uint64_t address;
+ uint64_t offset;
+ };
+
+ uint64_t retired_page;
+ uint64_t ts;
+
+ enum amdgpu_ras_eeprom_err_type err_type;
+
+ union {
+ unsigned char bank;
+ unsigned char cu;
+ };
+
+ unsigned char mem_channel;
+ unsigned char mcumc_id;
+} __packed;
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
+ bool *exceed_err_limit);
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
+
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
+
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, const u32 num);
+
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, const u32 num);
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
+
+#endif // _AMDGPU_RAS_EEPROM_H
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
new file mode 100644
index 000000000..3c988cc40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#ifndef __AMDGPU_RES_CURSOR_H__
+#define __AMDGPU_RES_CURSOR_H__
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct amdgpu_res_cursor {
+ uint64_t start;
+ uint64_t size;
+ uint64_t remaining;
+ void *node;
+ uint32_t mem_type;
+};
+
+/**
+ * amdgpu_res_first - initialize a amdgpu_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void amdgpu_res_first(struct ttm_resource *res,
+ uint64_t start, uint64_t size,
+ struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+ struct list_head *head, *next;
+ struct drm_mm_node *node;
+
+ if (!res)
+ goto fallback;
+
+ BUG_ON(start + size > res->size);
+
+ cur->mem_type = res->mem_type;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ head = &to_amdgpu_vram_mgr_resource(res)->blocks;
+
+ block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!block)
+ goto fallback;
+
+ while (start >= amdgpu_vram_mgr_block_size(block)) {
+ start -= amdgpu_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ if (next != head)
+ block = list_entry(next, struct drm_buddy_block, link);
+ }
+
+ cur->start = amdgpu_vram_mgr_block_start(block) + start;
+ cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
+ cur->remaining = size;
+ cur->node = block;
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ node = to_ttm_range_mgr_node(res)->mm_nodes;
+ while (start >= node->size << PAGE_SHIFT)
+ start -= node++->size << PAGE_SHIFT;
+
+ cur->start = (node->start << PAGE_SHIFT) + start;
+ cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ cur->remaining = size;
+ cur->node = node;
+ break;
+ default:
+ goto fallback;
+ }
+
+ return;
+
+fallback:
+ cur->start = start;
+ cur->size = size;
+ cur->remaining = size;
+ cur->node = NULL;
+ WARN_ON(res && start + size > res->size);
+ return;
+}
+
+/**
+ * amdgpu_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
+{
+ struct drm_buddy_block *block;
+ struct drm_mm_node *node;
+ struct list_head *next;
+
+ BUG_ON(size > cur->remaining);
+
+ cur->remaining -= size;
+ if (!cur->remaining)
+ return;
+
+ cur->size -= size;
+ if (cur->size) {
+ cur->start += size;
+ return;
+ }
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+
+ cur->node = block;
+ cur->start = amdgpu_vram_mgr_block_start(block);
+ cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_DOORBELL:
+ node = cur->node;
+
+ cur->node = ++node;
+ cur->start = node->start << PAGE_SHIFT;
+ cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ break;
+ default:
+ return;
+ }
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
new file mode 100644
index 000000000..5fed06ffc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_reset.h"
+#include "aldebaran.h"
+#include "sienna_cichlid.h"
+#include "smu_v13_0_10.h"
+
+int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_handler *handler)
+{
+ /* TODO: Check if handler exists? */
+ list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers);
+ return 0;
+}
+
+int amdgpu_reset_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ ret = aldebaran_reset_init(adev);
+ break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_init(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_init(adev);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_reset_fini(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ ret = aldebaran_reset_fini(adev);
+ break;
+ case IP_VERSION(11, 0, 7):
+ ret = sienna_cichlid_reset_fini(adev);
+ break;
+ case IP_VERSION(13, 0, 10):
+ ret = smu_v13_0_10_reset_fini(adev);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *reset_handler = NULL;
+
+ if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
+ reset_handler = adev->reset_cntl->get_reset_handler(
+ adev->reset_cntl, reset_context);
+ if (!reset_handler)
+ return -EOPNOTSUPP;
+
+ return reset_handler->prepare_hwcontext(adev->reset_cntl,
+ reset_context);
+}
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
+{
+ int ret;
+ struct amdgpu_reset_handler *reset_handler = NULL;
+
+ if (adev->reset_cntl)
+ reset_handler = adev->reset_cntl->get_reset_handler(
+ adev->reset_cntl, reset_context);
+ if (!reset_handler)
+ return -EOPNOTSUPP;
+
+ ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
+ if (ret)
+ return ret;
+
+ return reset_handler->restore_hwcontext(adev->reset_cntl,
+ reset_context);
+}
+
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref)
+{
+ struct amdgpu_reset_domain *reset_domain = container_of(ref,
+ struct amdgpu_reset_domain,
+ refcount);
+ if (reset_domain->wq)
+ destroy_workqueue(reset_domain->wq);
+
+ kvfree(reset_domain);
+}
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name)
+{
+ struct amdgpu_reset_domain *reset_domain;
+
+ reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
+ if (!reset_domain) {
+ DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
+ return NULL;
+ }
+
+ reset_domain->type = type;
+ kref_init(&reset_domain->refcount);
+
+ reset_domain->wq = create_singlethread_workqueue(wq_name);
+ if (!reset_domain->wq) {
+ DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
+ amdgpu_reset_put_reset_domain(reset_domain);
+ return NULL;
+
+ }
+
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ atomic_set(&reset_domain->reset_res, 0);
+ init_rwsem(&reset_domain->sem);
+
+ return reset_domain;
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 1);
+ down_write(&reset_domain->sem);
+}
+
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+ atomic_set(&reset_domain->in_gpu_reset, 0);
+ up_write(&reset_domain->sem);
+}
+
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
new file mode 100644
index 000000000..f4a501ff8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RESET_H__
+#define __AMDGPU_RESET_H__
+
+#include "amdgpu.h"
+
+enum AMDGPU_RESET_FLAGS {
+
+ AMDGPU_NEED_FULL_RESET = 0,
+ AMDGPU_SKIP_HW_RESET = 1,
+ AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
+};
+
+struct amdgpu_reset_context {
+ enum amd_reset_method method;
+ struct amdgpu_device *reset_req_dev;
+ struct amdgpu_job *job;
+ struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
+ unsigned long flags;
+};
+
+struct amdgpu_reset_handler {
+ enum amd_reset_method reset_method;
+ struct list_head handler_list;
+ int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*perform_reset)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*restore_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ int (*restore_env)(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+
+ int (*do_reset)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_reset_control {
+ void *handle;
+ struct work_struct reset_work;
+ struct mutex reset_lock;
+ struct list_head reset_handlers;
+ atomic_t in_reset;
+ enum amd_reset_method active_reset;
+ struct amdgpu_reset_handler *(*get_reset_handler)(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *context);
+ void (*async_reset)(struct work_struct *work);
+};
+
+
+enum amdgpu_reset_domain_type {
+ SINGLE_DEVICE,
+ XGMI_HIVE
+};
+
+struct amdgpu_reset_domain {
+ struct kref refcount;
+ struct workqueue_struct *wq;
+ enum amdgpu_reset_domain_type type;
+ struct rw_semaphore sem;
+ atomic_t in_gpu_reset;
+ atomic_t reset_res;
+};
+
+
+int amdgpu_reset_init(struct amdgpu_device *adev);
+int amdgpu_reset_fini(struct amdgpu_device *adev);
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_handler *handler);
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+ char *wq_name);
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref);
+
+static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ return kref_get_unless_zero(&domain->refcount) != 0;
+}
+
+static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
+{
+ if (domain)
+ kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+}
+
+static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
+ struct work_struct *work)
+{
+ return queue_work(domain->wq, work);
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
new file mode 100644
index 000000000..80d6e132e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -0,0 +1,713 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ * Christian König
+ */
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+
+/*
+ * Rings
+ * Most engines on the GPU are fed via ring buffers. Ring
+ * buffers are areas of GPU accessible memory that the host
+ * writes commands into and the GPU reads commands out of.
+ * There is a rptr (read pointer) that determines where the
+ * GPU is currently reading, and a wptr (write pointer)
+ * which determines where the host has written. When the
+ * pointers are equal, the ring is idle. When the host
+ * writes commands to the ring buffer, it increments the
+ * wptr. The GPU then starts fetching commands and executes
+ * them until the pointers are equal again.
+ */
+
+/**
+ * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
+ *
+ * @type: ring type for which to return the limit.
+ */
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
+{
+ switch (type) {
+ case AMDGPU_RING_TYPE_GFX:
+ /* Need to keep at least 192 on GFX7+ for old radv. */
+ return 192;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ return 125;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ return 16;
+ default:
+ return 49;
+ }
+}
+
+/**
+ * amdgpu_ring_alloc - allocate space on the ring buffer
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
+{
+ /* Align requested size with padding so unlock_commit can
+ * pad safely */
+ ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ /* Make sure we aren't trying to allocate more space
+ * than the maximum for one submission
+ */
+ if (WARN_ON_ONCE(ndw > ring->max_dw))
+ return -ENOMEM;
+
+ ring->count_dw = ndw;
+ ring->wptr_old = ring->wptr;
+
+ if (ring->funcs->begin_use)
+ ring->funcs->begin_use(ring);
+
+ return 0;
+}
+
+/** amdgpu_ring_insert_nop - insert NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @count: the number of NOP packets to insert
+ *
+ * This is the generic insert_nop function for rings except SDMA
+ */
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * amdgpu_ring_generic_pad_ib - pad IB with NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: IB to add NOP packets to
+ *
+ * This is the generic pad_ib function for rings except SDMA
+ */
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ while (ib->length_dw & ring->funcs->align_mask)
+ ib->ptr[ib->length_dw++] = ring->funcs->nop;
+}
+
+/**
+ * amdgpu_ring_commit - tell the GPU to execute the new
+ * commands on the ring buffer
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Update the wptr (write pointer) to tell the GPU to
+ * execute new commands on the ring buffer (all asics).
+ */
+void amdgpu_ring_commit(struct amdgpu_ring *ring)
+{
+ uint32_t count;
+
+ /* We pad to match fetch size */
+ count = ring->funcs->align_mask + 1 -
+ (ring->wptr & ring->funcs->align_mask);
+ count %= ring->funcs->align_mask + 1;
+ ring->funcs->insert_nop(ring, count);
+
+ mb();
+ amdgpu_ring_set_wptr(ring);
+
+ if (ring->funcs->end_use)
+ ring->funcs->end_use(ring);
+}
+
+/**
+ * amdgpu_ring_undo - reset the wptr
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Reset the driver's copy of the wptr (all asics).
+ */
+void amdgpu_ring_undo(struct amdgpu_ring *ring)
+{
+ ring->wptr = ring->wptr_old;
+
+ if (ring->funcs->end_use)
+ ring->funcs->end_use(ring);
+}
+
+#define amdgpu_ring_get_gpu_addr(ring, offset) \
+ (ring->is_mes_queue ? \
+ (ring->mes_ctx->meta_data_gpu_addr + offset) : \
+ (ring->adev->wb.gpu_addr + offset * 4))
+
+#define amdgpu_ring_get_cpu_addr(ring, offset) \
+ (ring->is_mes_queue ? \
+ (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
+ (&ring->adev->wb.wb[offset]))
+
+/**
+ * amdgpu_ring_init - init driver ring struct.
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring structure holding ring information
+ * @max_dw: maximum number of dw for ring alloc
+ * @irq_src: interrupt source to use for this ring
+ * @irq_type: interrupt type to use for this ring
+ * @hw_prio: ring priority (NORMAL/HIGH)
+ * @sched_score: optional score atomic shared with other schedulers
+ *
+ * Initialize the driver information for the selected ring (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+ unsigned int irq_type, unsigned int hw_prio,
+ atomic_t *sched_score)
+{
+ int r;
+ int sched_hw_submission = amdgpu_sched_hw_submission;
+ u32 *num_sched;
+ u32 hw_ip;
+ unsigned int max_ibs_dw;
+
+ /* Set the hw submission limit higher for KIQ because
+ * it's used for a number of gfx/compute tasks by both
+ * KFD and KGD which may have outstanding fences and
+ * it doesn't really use the gpu scheduler anyway;
+ * KIQ tasks get submitted directly to the ring.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ sched_hw_submission = max(sched_hw_submission, 256);
+ else if (ring == &adev->sdma.instance[0].page)
+ sched_hw_submission = 256;
+
+ if (ring->adev == NULL) {
+ if (adev->num_rings >= AMDGPU_MAX_RINGS)
+ return -EINVAL;
+
+ ring->adev = adev;
+ ring->num_hw_submission = sched_hw_submission;
+ ring->sched_score = sched_score;
+ ring->vmid_wait = dma_fence_get_stub();
+
+ if (!ring->is_mes_queue) {
+ ring->idx = adev->num_rings++;
+ adev->rings[ring->idx] = ring;
+ }
+
+ r = amdgpu_fence_driver_init_ring(ring);
+ if (r)
+ return r;
+ }
+
+ if (ring->is_mes_queue) {
+ ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_RPTR_OFFS);
+ ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_WPTR_OFFS);
+ ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_FENCE_OFFS);
+ ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
+ ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_COND_EXE_OFFS);
+ } else {
+ r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ return r;
+ }
+
+ r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+ return r;
+ }
+ }
+
+ ring->fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
+ ring->fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
+
+ ring->rptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs);
+ ring->rptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs);
+
+ ring->wptr_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs);
+ ring->wptr_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs);
+
+ ring->trail_fence_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
+ ring->trail_fence_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
+
+ ring->cond_exe_gpu_addr =
+ amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
+ ring->cond_exe_cpu_addr =
+ amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
+
+ /* always set cond_exec_polling to CONTINUE */
+ *ring->cond_exe_cpu_addr = 1;
+
+ r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+ if (r) {
+ dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+ return r;
+ }
+
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
+
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+
+ ring->buf_mask = (ring->ring_size / 4) - 1;
+ ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+ 0xffffffffffffffff : ring->buf_mask;
+
+ /* Allocate ring buffer */
+ if (ring->is_mes_queue) {
+ int offset = 0;
+
+ BUG_ON(ring->ring_size > PAGE_SIZE*4);
+
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_RING_OFFS);
+ ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ amdgpu_ring_clear_ring(ring);
+
+ } else if (ring->ring_obj == NULL) {
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ring->ring_obj,
+ &ring->gpu_addr,
+ (void **)&ring->ring);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring create failed\n", r);
+ return r;
+ }
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ ring->max_dw = max_dw;
+ ring->hw_prio = hw_prio;
+
+ if (!ring->no_scheduler) {
+ hw_ip = ring->funcs->type;
+ num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+ adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
+ &ring->sched;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_ring_fini - tear down the driver ring struct.
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Tear down the driver information for the selected ring (all asics).
+ */
+void amdgpu_ring_fini(struct amdgpu_ring *ring)
+{
+
+ /* Not to finish a ring which is not initialized */
+ if (!(ring->adev) ||
+ (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
+ return;
+
+ ring->sched.ready = false;
+
+ if (!ring->is_mes_queue) {
+ amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+ amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
+
+ amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+ amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+
+ amdgpu_bo_free_kernel(&ring->ring_obj,
+ &ring->gpu_addr,
+ (void **)&ring->ring);
+ } else {
+ kfree(ring->fence_drv.fences);
+ }
+
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = NULL;
+ ring->me = 0;
+
+ if (!ring->is_mes_queue)
+ ring->adev->rings[ring->idx] = NULL;
+}
+
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @ring: ring to write to
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+/**
+ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
+ *
+ * @ring: ring to try the recovery on
+ * @vmid: VMID we try to get going again
+ * @fence: timedout fence
+ *
+ * Tries to get a ring proceeding again when it is stuck.
+ */
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+ struct dma_fence *fence)
+{
+ unsigned long flags;
+
+ ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+
+ if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
+ return false;
+
+ spin_lock_irqsave(fence->lock, flags);
+ if (!dma_fence_is_signaled_locked(fence))
+ dma_fence_set_error(fence, -ENODATA);
+ spin_unlock_irqrestore(fence->lock, flags);
+
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ while (!dma_fence_is_signaled(fence) &&
+ ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+ ring->funcs->soft_recovery(ring, vmid);
+
+ return dma_fence_is_signaled(fence);
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ int r, i;
+ uint32_t value, result, early[3];
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ result = 0;
+
+ if (*pos < 12) {
+ early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
+ early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
+ early[2] = ring->wptr & ring->buf_mask;
+ for (i = *pos / 4; i < 3 && size; i++) {
+ r = put_user(early[i], (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+ }
+
+ while (size) {
+ if (*pos >= (ring->ring_size + 12))
+ return result;
+
+ value = ring->ring[(*pos - 12)/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+
+ return result;
+}
+
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_ring_read,
+ .llseek = default_llseek
+};
+
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ volatile u32 *mqd;
+ int r;
+ uint32_t value, result;
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ result = 0;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+ if (r) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ while (size) {
+ if (*pos >= ring->mqd_size)
+ goto done;
+
+ value = mqd[*pos/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ goto done;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+
+done:
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ mqd = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ return r;
+
+ return result;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_mqd_read,
+ .llseek = default_llseek
+};
+
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+ struct amdgpu_ring *ring = data;
+
+ amdgpu_fence_driver_set_error(ring, val);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+ amdgpu_debugfs_ring_error, "%lld\n");
+
+#endif
+
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_ring_%s", ring->name);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_ring_fops,
+ ring->ring_size + 12);
+
+ if (ring->mqd_obj) {
+ sprintf(name, "amdgpu_mqd_%s", ring->name);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_mqd_fops,
+ ring->mqd_size);
+ }
+
+ sprintf(name, "amdgpu_error_%s", ring->name);
+ debugfs_create_file(name, 0200, root, ring,
+ &amdgpu_debugfs_error_fops);
+
+#endif
+}
+
+/**
+ * amdgpu_ring_test_helper - tests ring and set sched readiness status
+ *
+ * @ring: ring to try the recovery on
+ *
+ * Tests ring and set sched readiness status
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
+ ring->name, r);
+ else
+ DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
+ ring->name);
+
+ ring->sched.ready = !r;
+ return r;
+}
+
+static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ memset(prop, 0, sizeof(*prop));
+
+ prop->mqd_gpu_addr = ring->mqd_gpu_addr;
+ prop->hqd_base_gpu_addr = ring->gpu_addr;
+ prop->rptr_gpu_addr = ring->rptr_gpu_addr;
+ prop->wptr_gpu_addr = ring->wptr_gpu_addr;
+ prop->queue_size = ring->ring_size;
+ prop->eop_gpu_addr = ring->eop_gpu_addr;
+ prop->use_doorbell = ring->use_doorbell;
+ prop->doorbell_index = ring->doorbell_index;
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
+
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+ prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+}
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_mqd *mqd_mgr;
+ struct amdgpu_mqd_prop prop;
+
+ amdgpu_ring_to_mqd_prop(ring, &prop);
+
+ ring->wptr = 0;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE];
+ else
+ mqd_mgr = &adev->mqds[ring->funcs->type];
+
+ return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
+}
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_end(ring);
+}
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
new file mode 100644
index 000000000..e2ab303ad
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -0,0 +1,451 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_RING_H__
+#define __AMDGPU_RING_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+struct amdgpu_job;
+struct amdgpu_vm;
+
+/* max number of rings */
+#define AMDGPU_MAX_RINGS 124
+#define AMDGPU_MAX_HWIP_RINGS 64
+#define AMDGPU_MAX_GFX_RINGS 2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
+#define AMDGPU_MAX_COMPUTE_RINGS 8
+#define AMDGPU_MAX_VCE_RINGS 3
+#define AMDGPU_MAX_UVD_ENC_RINGS 2
+
+enum amdgpu_ring_priority_level {
+ AMDGPU_RING_PRIO_0,
+ AMDGPU_RING_PRIO_1,
+ AMDGPU_RING_PRIO_DEFAULT = 1,
+ AMDGPU_RING_PRIO_2,
+ AMDGPU_RING_PRIO_MAX
+};
+
+/* some special values for the owner field */
+#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
+
+#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
+#define AMDGPU_FENCE_FLAG_INT (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
+
+#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
+
+#define AMDGPU_IB_POOL_SIZE (1024 * 1024)
+
+enum amdgpu_ring_type {
+ AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX,
+ AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE,
+ AMDGPU_RING_TYPE_SDMA = AMDGPU_HW_IP_DMA,
+ AMDGPU_RING_TYPE_UVD = AMDGPU_HW_IP_UVD,
+ AMDGPU_RING_TYPE_VCE = AMDGPU_HW_IP_VCE,
+ AMDGPU_RING_TYPE_UVD_ENC = AMDGPU_HW_IP_UVD_ENC,
+ AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
+ AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
+ AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
+ AMDGPU_RING_TYPE_KIQ,
+ AMDGPU_RING_TYPE_MES
+};
+
+enum amdgpu_ib_pool_type {
+ /* Normal submissions to the top of the pipeline. */
+ AMDGPU_IB_POOL_DELAYED,
+ /* Immediate submissions to the bottom of the pipeline. */
+ AMDGPU_IB_POOL_IMMEDIATE,
+ /* Direct submission to the ring buffer during init and reset. */
+ AMDGPU_IB_POOL_DIRECT,
+
+ AMDGPU_IB_POOL_MAX
+};
+
+struct amdgpu_ib {
+ struct drm_suballoc *sa_bo;
+ uint32_t length_dw;
+ uint64_t gpu_addr;
+ uint32_t *ptr;
+ uint32_t flags;
+};
+
+struct amdgpu_sched {
+ u32 num_scheds;
+ struct drm_gpu_scheduler *sched[AMDGPU_MAX_HWIP_RINGS];
+};
+
+/*
+ * Fences.
+ */
+struct amdgpu_fence_driver {
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_addr;
+ /* sync_seq is protected by ring emission lock */
+ uint32_t sync_seq;
+ atomic_t last_seq;
+ bool initialized;
+ struct amdgpu_irq_src *irq_src;
+ unsigned irq_type;
+ struct timer_list fallback_timer;
+ unsigned num_fences_mask;
+ spinlock_t lock;
+ struct dma_fence **fences;
+};
+
+extern const struct drm_sched_backend_ops amdgpu_sched_ops;
+
+void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
+void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
+
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq_src,
+ unsigned irq_type);
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job,
+ unsigned flags);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout);
+bool amdgpu_fence_process(struct amdgpu_ring *ring);
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+ uint32_t wait_seq,
+ signed long timeout);
+unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
+
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+ ktime_t timestamp);
+
+/*
+ * Rings.
+ */
+
+/* provided by hw blocks that expose a ring buffer for commands */
+struct amdgpu_ring_funcs {
+ enum amdgpu_ring_type type;
+ uint32_t align_mask;
+ u32 nop;
+ bool support_64bit_ptrs;
+ bool no_user_fence;
+ bool secure_submission_supported;
+ unsigned extra_dw;
+
+ /* ring read/write ptr handling */
+ u64 (*get_rptr)(struct amdgpu_ring *ring);
+ u64 (*get_wptr)(struct amdgpu_ring *ring);
+ void (*set_wptr)(struct amdgpu_ring *ring);
+ /* validating and patching of IBs */
+ int (*parse_cs)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+ int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+ /* constants to calculate how many DW are needed for an emit */
+ unsigned emit_frame_size;
+ unsigned emit_ib_size;
+ /* command emit functions */
+ void (*emit_ib)(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags);
+ void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned flags);
+ void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
+ void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
+ uint64_t pd_addr);
+ void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+ void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size);
+ /* testing functions */
+ int (*test_ring)(struct amdgpu_ring *ring);
+ int (*test_ib)(struct amdgpu_ring *ring, long timeout);
+ /* insert NOP packets */
+ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+ void (*insert_start)(struct amdgpu_ring *ring);
+ void (*insert_end)(struct amdgpu_ring *ring);
+ /* pad the indirect buffer to the necessary number of dw */
+ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
+ void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ /* note usage for clock and power gating */
+ void (*begin_use)(struct amdgpu_ring *ring);
+ void (*end_use)(struct amdgpu_ring *ring);
+ void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow, int vmid);
+ void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs);
+ void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+ void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+ void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
+ void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+ bool secure);
+ /* Try to soft recover the ring to make the fence signal */
+ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
+ int (*preempt_ib)(struct amdgpu_ring *ring);
+ void (*emit_mem_sync)(struct amdgpu_ring *ring);
+ void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+ void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
+};
+
+struct amdgpu_ring {
+ struct amdgpu_device *adev;
+ const struct amdgpu_ring_funcs *funcs;
+ struct amdgpu_fence_driver fence_drv;
+ struct drm_gpu_scheduler sched;
+
+ struct amdgpu_bo *ring_obj;
+ volatile uint32_t *ring;
+ unsigned rptr_offs;
+ u64 rptr_gpu_addr;
+ volatile u32 *rptr_cpu_addr;
+ u64 wptr;
+ u64 wptr_old;
+ unsigned ring_size;
+ unsigned max_dw;
+ int count_dw;
+ uint64_t gpu_addr;
+ uint64_t ptr_mask;
+ uint32_t buf_mask;
+ u32 idx;
+ u32 xcc_id;
+ u32 xcp_id;
+ u32 me;
+ u32 pipe;
+ u32 queue;
+ struct amdgpu_bo *mqd_obj;
+ uint64_t mqd_gpu_addr;
+ void *mqd_ptr;
+ unsigned mqd_size;
+ uint64_t eop_gpu_addr;
+ u32 doorbell_index;
+ bool use_doorbell;
+ bool use_pollmem;
+ unsigned wptr_offs;
+ u64 wptr_gpu_addr;
+ volatile u32 *wptr_cpu_addr;
+ unsigned fence_offs;
+ u64 fence_gpu_addr;
+ volatile u32 *fence_cpu_addr;
+ uint64_t current_ctx;
+ char name[16];
+ u32 trail_seq;
+ unsigned trail_fence_offs;
+ u64 trail_fence_gpu_addr;
+ volatile u32 *trail_fence_cpu_addr;
+ unsigned cond_exe_offs;
+ u64 cond_exe_gpu_addr;
+ volatile u32 *cond_exe_cpu_addr;
+ unsigned vm_hub;
+ unsigned vm_inv_eng;
+ struct dma_fence *vmid_wait;
+ bool has_compute_vm_bug;
+ bool no_scheduler;
+ int hw_prio;
+ unsigned num_hw_submission;
+ atomic_t *sched_score;
+
+ /* used for mes */
+ bool is_mes_queue;
+ uint32_t hw_queue_id;
+ struct amdgpu_mes_ctx_data *mes_ctx;
+
+ bool is_sw_ring;
+ unsigned int entry_index;
+
+};
+
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
+#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
+#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0)
+#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
+#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
+#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
+#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
+#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
+#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
+#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
+#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
+#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
+#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
+#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
+
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_ring_commit(struct amdgpu_ring *ring);
+void amdgpu_ring_undo(struct amdgpu_ring *ring);
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+ unsigned int irq_type, unsigned int hw_prio,
+ atomic_t *sched_score);
+void amdgpu_ring_fini(struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t val0,
+ uint32_t reg1, uint32_t val1);
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+ struct dma_fence *fence);
+
+static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
+ bool cond_exec)
+{
+ *ring->cond_exe_cpu_addr = cond_exec;
+}
+
+static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
+{
+ int i = 0;
+ while (i <= ring->buf_mask)
+ ring->ring[i++] = ring->funcs->nop;
+
+}
+
+static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
+{
+ if (ring->count_dw <= 0)
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+ ring->ring[ring->wptr++ & ring->buf_mask] = v;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw--;
+}
+
+static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
+ void *src, int count_dw)
+{
+ unsigned occupied, chunk1, chunk2;
+ void *dst;
+
+ if (unlikely(ring->count_dw < count_dw))
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
+ occupied = ring->wptr & ring->buf_mask;
+ dst = (void *)&ring->ring[occupied];
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
+ chunk2 = count_dw - chunk1;
+ chunk1 <<= 2;
+ chunk2 <<= 2;
+
+ if (chunk1)
+ memcpy(dst, src, chunk1);
+
+ if (chunk2) {
+ src += chunk1;
+ dst = (void *)ring->ring;
+ memcpy(dst, src, chunk2);
+ }
+
+ ring->wptr += count_dw;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count_dw;
+}
+
+#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
+ (ring->is_mes_queue && ring->mes_ctx ? \
+ (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
+
+#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
+ (ring->is_mes_queue && ring->mes_ctx ? \
+ (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
+ NULL)
+
+int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
+
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
+
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
+{
+ return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+ uint32_t value)
+{
+ ib->ptr[idx] = value;
+}
+
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned size,
+ enum amdgpu_ib_pool_type pool,
+ struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
+ struct dma_fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ibs, struct amdgpu_job *job,
+ struct dma_fence **f);
+int amdgpu_ib_pool_init(struct amdgpu_device *adev);
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000..e1ee1c711
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+ unsigned int hw_pio;
+ const char *ring_name;
+} sw_ring_info[] = {
+ { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+ { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring)
+{
+ return ring->entry_index < mux->ring_entry_size ?
+ &mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+ u64 start, end;
+ struct amdgpu_ring *real_ring = mux->real_ring;
+
+ start = s_start & ring->buf_mask;
+ end = s_end & ring->buf_mask;
+
+ if (start == end) {
+ DRM_ERROR("no more data copied from sw ring\n");
+ return;
+ }
+ if (start > end) {
+ amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+ (ring->ring_size >> 2) - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+ } else {
+ amdgpu_ring_alloc(real_ring, end - start);
+ amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+ }
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e = NULL;
+ struct amdgpu_mux_chunk *chunk;
+ uint32_t seq, last_seq;
+ int i;
+
+ /*find low priority entries:*/
+ if (!mux->s_resubmit)
+ return;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ e = &mux->ring_entry[i];
+ break;
+ }
+ }
+
+ if (!e) {
+ DRM_ERROR("%s no low priority ring found\n", __func__);
+ return;
+ }
+
+ last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+ seq = mux->seqno_to_resubmit;
+ if (last_seq < seq) {
+ /*resubmit all the fences between (last_seq, seq]*/
+ list_for_each_entry(chunk, &e->list, entry) {
+ if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+ amdgpu_fence_update_start_timestamp(e->ring,
+ chunk->sync_seq,
+ ktime_get());
+ if (chunk->sync_seq ==
+ le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+ if (chunk->cntl_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_cntl(e->ring,
+ chunk->cntl_offset);
+ if (chunk->ce_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
+ if (chunk->de_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_de(e->ring, chunk->de_offset);
+ }
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+ chunk->start,
+ chunk->end);
+ mux->wptr_resubmit = chunk->end;
+ amdgpu_ring_commit(mux->real_ring);
+ }
+ }
+ }
+
+ del_timer(&mux->resubmit_timer);
+ mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+ mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+ struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+
+ if (!spin_trylock(&mux->lock)) {
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ DRM_ERROR("reschedule resubmit\n");
+ return;
+ }
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size)
+{
+ mux->real_ring = ring;
+ mux->num_ring_entries = 0;
+
+ mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+ if (!mux->ring_entry)
+ return -ENOMEM;
+
+ mux->ring_entry_size = entry_size;
+ mux->s_resubmit = false;
+
+ amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
+ sizeof(struct amdgpu_mux_chunk), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_mux_chunk_slab) {
+ DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&mux->lock);
+ timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+ return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *chunk2;
+ int i;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+ kmem_cache_destroy(amdgpu_mux_chunk_slab);
+ kfree(mux->ring_entry);
+ mux->ring_entry = NULL;
+ mux->num_ring_entries = 0;
+ mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ if (mux->num_ring_entries >= mux->ring_entry_size) {
+ DRM_ERROR("add sw ring exceeding max entry size\n");
+ return -ENOENT;
+ }
+
+ e = &mux->ring_entry[mux->num_ring_entries];
+ ring->entry_index = mux->num_ring_entries;
+ e->ring = ring;
+
+ INIT_LIST_HEAD(&e->list);
+ mux->num_ring_entries += 1;
+ return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+ struct amdgpu_mux_entry *e;
+
+ spin_lock(&mux->lock);
+
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+ amdgpu_mux_resubmit_chunks(mux);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ /* We could skip this set wptr as preemption in process. */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+ spin_unlock(&mux->lock);
+ return;
+ }
+
+ e->sw_cptr = e->sw_wptr;
+ /* Update cptr if the package already copied in resubmit functions */
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+ e->sw_cptr = mux->wptr_resubmit;
+ e->sw_wptr = wptr;
+ e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+ /* Skip copying for the packages already resubmitted.*/
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+ amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ amdgpu_ring_commit(mux->real_ring);
+ } else {
+ e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+ }
+ spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry for sw ring\n");
+ return 0;
+ }
+
+ return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ u64 readp, offset, start, end;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("no sw entry found!\n");
+ return 0;
+ }
+
+ readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+ start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+ if (start > end) {
+ if (readp <= end)
+ readp += mux->real_ring->ring_size >> 2;
+ end += mux->real_ring->ring_size >> 2;
+ }
+
+ if (start <= readp && readp <= end) {
+ offset = readp - start;
+ e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+ } else if (readp < start) {
+ e->sw_rptr = e->sw_cptr;
+ } else {
+ /* end < readptr */
+ e->sw_rptr = e->sw_wptr;
+ }
+
+ return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+ return idx < ARRAY_SIZE(sw_ring_info) ?
+ sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_ring *ring;
+ int i, need_preempt;
+
+ need_preempt = 0;
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ ring = mux->ring_entry[i].ring;
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_count_emitted(ring) > 0)
+ return 0;
+ if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+ amdgpu_fence_last_unsignaled_time_us(ring) >
+ AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+ need_preempt = 1;
+ }
+ return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+ int r;
+
+ spin_lock(&mux->lock);
+ mux->pending_trailing_fence_signaled = true;
+ r = amdgpu_ring_preempt_ib(mux->real_ring);
+ spin_unlock(&mux->lock);
+ return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+ if (amdgpu_mcbp_scan(mux) > 0)
+ amdgpu_mcbp_trigger_preempt(mux);
+ return;
+ }
+
+ amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+ WARN_ON(!ring->is_sw_ring);
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+ amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+ unsigned offset;
+
+ if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ return;
+
+ offset = ring->wptr & ring->buf_mask;
+
+ amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ spin_lock(&mux->lock);
+ amdgpu_mux_resubmit_chunks(mux);
+ spin_unlock(&mux->lock);
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+ if (!chunk) {
+ DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+ return;
+ }
+
+ chunk->start = ring->wptr;
+ /* the initialized value used to check if they are set by the ib submission*/
+ chunk->cntl_offset = ring->buf_mask + 1;
+ chunk->de_offset = ring->buf_mask + 1;
+ chunk->ce_offset = ring->buf_mask + 1;
+ list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ uint32_t last_seq = 0;
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk, *tmp;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+ list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+ if (chunk->sync_seq <= last_seq) {
+ list_del(&chunk->entry);
+ kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+ }
+ }
+}
+
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring, u64 offset,
+ enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ switch (type) {
+ case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
+ chunk->cntl_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_DE:
+ chunk->de_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_CE:
+ chunk->ce_offset = offset;
+ break;
+ default:
+ DRM_ERROR("invalid type (%d)\n", type);
+ break;
+ }
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ chunk->end = ring->wptr;
+ chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+ scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_ring *ring = NULL;
+ int i;
+
+ if (!mux->pending_trailing_fence_signaled)
+ return false;
+
+ if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+ return false;
+
+ for (i = 0; i < mux->num_ring_entries; i++) {
+ e = &mux->ring_entry[i];
+ if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+ ring = e->ring;
+ break;
+ }
+ }
+
+ if (!ring) {
+ DRM_ERROR("cannot find low priority ring\n");
+ return false;
+ }
+
+ amdgpu_fence_process(ring);
+ if (amdgpu_fence_count_emitted(ring) > 0) {
+ mux->s_resubmit = true;
+ mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+ amdgpu_ring_mux_schedule_resubmit(mux);
+ }
+
+ mux->pending_trailing_fence_signaled = false;
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000..d3186b570
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+ struct amdgpu_ring *ring;
+ u64 start_ptr_in_hw_ring;
+ u64 end_ptr_in_hw_ring;
+ u64 sw_cptr;
+ u64 sw_rptr;
+ u64 sw_wptr;
+ struct list_head list;
+};
+
+enum amdgpu_ring_mux_offset_type {
+ AMDGPU_MUX_OFFSET_TYPE_CONTROL,
+ AMDGPU_MUX_OFFSET_TYPE_DE,
+ AMDGPU_MUX_OFFSET_TYPE_CE,
+};
+
+enum ib_complete_status {
+ /* IB not started/reset value, default value. */
+ IB_COMPLETION_STATUS_DEFAULT = 0,
+ /* IB preempted, started but not completed. */
+ IB_COMPLETION_STATUS_PREEMPTED = 1,
+ /* IB completed. */
+ IB_COMPLETION_STATUS_COMPLETED = 2,
+};
+
+struct amdgpu_ring_mux {
+ struct amdgpu_ring *real_ring;
+
+ struct amdgpu_mux_entry *ring_entry;
+ unsigned int num_ring_entries;
+ unsigned int ring_entry_size;
+ /*the lock for copy data from different software rings*/
+ spinlock_t lock;
+ bool s_resubmit;
+ uint32_t seqno_to_resubmit;
+ u64 wptr_resubmit;
+ struct timer_list resubmit_timer;
+
+ bool pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ * @control_offset:- the PRE_RESUME bit position used for resubmission.
+ * @de_offset:- the anchor in write_data for de meta of resubmission.
+ * @ce_offset:- the anchor in write_data for ce meta of resubmission.
+ */
+struct amdgpu_mux_chunk {
+ struct list_head entry;
+ uint32_t sync_seq;
+ u64 start;
+ u64 end;
+ u64 cntl_offset;
+ u64 de_offset;
+ u64 ce_offset;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ u64 offset, enum amdgpu_ring_mux_offset_type type);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
new file mode 100644
index 000000000..35e0ae9ac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_rlc.h"
+
+/**
+ * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
+ *
+ * @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
+ */
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ if (adev->gfx.rlc.in_safe_mode[xcc_id])
+ return;
+
+ /* if RLC is not enabled, do nothing */
+ if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
+ return;
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = true;
+ }
+}
+
+/**
+ * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
+ *
+ * @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
+ */
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
+ return;
+
+ /* if RLC is not enabled, do nothing */
+ if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
+ return;
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
+ adev->gfx.rlc.in_safe_mode[xcc_id] = false;
+ }
+}
+
+/**
+ * amdgpu_gfx_rlc_init_sr - Init save restore block
+ *
+ * @adev: amdgpu_device pointer
+ * @dws: the size of save restore block
+ *
+ * Allocate and setup value to save restore block of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
+{
+ const u32 *src_ptr;
+ volatile u32 *dst_ptr;
+ u32 i;
+ int r;
+
+ /* allocate save restore block */
+ r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.save_restore_obj,
+ &adev->gfx.rlc.save_restore_gpu_addr,
+ (void **)&adev->gfx.rlc.sr_ptr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
+ amdgpu_gfx_rlc_fini(adev);
+ return r;
+ }
+
+ /* write the sr buffer */
+ src_ptr = adev->gfx.rlc.reg_list;
+ dst_ptr = adev->gfx.rlc.sr_ptr;
+ for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
+ dst_ptr[i] = cpu_to_le32(src_ptr[i]);
+ amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_init_csb - Init clear state block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and setup value to clear state block of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
+{
+ u32 dws;
+ int r;
+
+ /* allocate clear state block */
+ adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
+ r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
+ amdgpu_gfx_rlc_fini(adev);
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_init_cpt - Init cp table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and setup value to cp table of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
+ amdgpu_gfx_rlc_fini(adev);
+ return r;
+ }
+
+ /* set up the cp table */
+ amdgpu_gfx_rlc_setup_cp_table(adev);
+ amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Write cp firmware data into cp table.
+ */
+void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ volatile u32 *dst_ptr;
+ int me, i, max_me;
+ u32 bo_offset = 0;
+ u32 table_offset, table_size;
+
+ max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
+
+ /* write the cp table buffer */
+ dst_ptr = adev->gfx.rlc.cp_table_ptr;
+ for (me = 0; me < max_me; me++) {
+ if (me == 0) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 1) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 2) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 3) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 4) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec2_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ }
+
+ for (i = 0; i < table_size; i ++) {
+ dst_ptr[bo_offset + i] =
+ cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+ }
+
+ bo_offset += table_size;
+ }
+}
+
+/**
+ * amdgpu_gfx_rlc_fini - Free BO which used for RLC
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
+ * and rlc_jump_table_block.
+ */
+void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
+{
+ /* save restore block */
+ if (adev->gfx.rlc.save_restore_obj) {
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
+ &adev->gfx.rlc.save_restore_gpu_addr,
+ (void **)&adev->gfx.rlc.sr_ptr);
+ }
+
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *common_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+ unsigned int *tmp;
+ unsigned int i;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+ return -ENOMEM;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ if (info->fw) {
+ common_hdr = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+ adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+ adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+ adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+ adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+ adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+ adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+ adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor)
+{
+ int err;
+
+ if (version_major < 2) {
+ /* only support rlc_hdr v2.x and onwards */
+ dev_err(adev->dev, "unsupported rlc fw hdr\n");
+ return -EINVAL;
+ }
+
+ /* is_rlc_v2_1 is still used in APU code path */
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ if (version_minor >= 0) {
+ err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+ if (err) {
+ dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+ return err;
+ }
+ }
+ if (version_minor >= 1)
+ amdgpu_gfx_rlc_init_microcode_v2_1(adev);
+ if (version_minor >= 2)
+ amdgpu_gfx_rlc_init_microcode_v2_2(adev);
+ if (version_minor == 3)
+ amdgpu_gfx_rlc_init_microcode_v2_3(adev);
+ if (version_minor == 4)
+ amdgpu_gfx_rlc_init_microcode_v2_4(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
new file mode 100644
index 000000000..b591d33af
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -0,0 +1,275 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RLC_H__
+#define __AMDGPU_RLC_H__
+
+#include "clearstate_defs.h"
+
+#define AMDGPU_MAX_RLC_INSTANCES 8
+
+/* firmware ID used in rlc toc */
+typedef enum _FIRMWARE_ID_ {
+ FIRMWARE_ID_INVALID = 0,
+ FIRMWARE_ID_RLC_G_UCODE = 1,
+ FIRMWARE_ID_RLC_TOC = 2,
+ FIRMWARE_ID_RLCG_SCRATCH = 3,
+ FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ FIRMWARE_ID_RLC_SRM_INDEX_ADDR = 5,
+ FIRMWARE_ID_RLC_SRM_INDEX_DATA = 6,
+ FIRMWARE_ID_RLC_P_UCODE = 7,
+ FIRMWARE_ID_RLC_V_UCODE = 8,
+ FIRMWARE_ID_RLX6_UCODE = 9,
+ FIRMWARE_ID_RLX6_DRAM_BOOT = 10,
+ FIRMWARE_ID_GLOBAL_TAP_DELAYS = 11,
+ FIRMWARE_ID_SE0_TAP_DELAYS = 12,
+ FIRMWARE_ID_SE1_TAP_DELAYS = 13,
+ FIRMWARE_ID_GLOBAL_SE0_SE1_SKEW_DELAYS = 14,
+ FIRMWARE_ID_SDMA0_UCODE = 15,
+ FIRMWARE_ID_SDMA0_JT = 16,
+ FIRMWARE_ID_SDMA1_UCODE = 17,
+ FIRMWARE_ID_SDMA1_JT = 18,
+ FIRMWARE_ID_CP_CE = 19,
+ FIRMWARE_ID_CP_PFP = 20,
+ FIRMWARE_ID_CP_ME = 21,
+ FIRMWARE_ID_CP_MEC = 22,
+ FIRMWARE_ID_CP_MES = 23,
+ FIRMWARE_ID_MES_STACK = 24,
+ FIRMWARE_ID_RLC_SRM_DRAM_SR = 25,
+ FIRMWARE_ID_RLCG_SCRATCH_SR = 26,
+ FIRMWARE_ID_RLCP_SCRATCH_SR = 27,
+ FIRMWARE_ID_RLCV_SCRATCH_SR = 28,
+ FIRMWARE_ID_RLX6_DRAM_SR = 29,
+ FIRMWARE_ID_SDMA0_PG_CONTEXT = 30,
+ FIRMWARE_ID_SDMA1_PG_CONTEXT = 31,
+ FIRMWARE_ID_GLOBAL_MUX_SELECT_RAM = 32,
+ FIRMWARE_ID_SE0_MUX_SELECT_RAM = 33,
+ FIRMWARE_ID_SE1_MUX_SELECT_RAM = 34,
+ FIRMWARE_ID_ACCUM_CTRL_RAM = 35,
+ FIRMWARE_ID_RLCP_CAM = 36,
+ FIRMWARE_ID_RLC_SPP_CAM_EXT = 37,
+ FIRMWARE_ID_MAX = 38,
+} FIRMWARE_ID;
+
+typedef enum _SOC21_FIRMWARE_ID_ {
+ SOC21_FIRMWARE_ID_INVALID = 0,
+ SOC21_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC21_FIRMWARE_ID_RLC_TOC = 2,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC21_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC21_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC21_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC21_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC21_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC21_FIRMWARE_ID_CP_PFP = 13,
+ SOC21_FIRMWARE_ID_CP_ME = 14,
+ SOC21_FIRMWARE_ID_CP_MEC = 15,
+ SOC21_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC21_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC21_FIRMWARE_ID_RS64_PFP = 18,
+ SOC21_FIRMWARE_ID_RS64_ME = 19,
+ SOC21_FIRMWARE_ID_RS64_MEC = 20,
+ SOC21_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC21_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC21_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC21_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC21_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC21_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC21_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC21_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC21_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC21_FIRMWARE_ID_MAX = 37
+} SOC21_FIRMWARE_ID;
+
+typedef struct _RLC_TABLE_OF_CONTENT {
+ union {
+ unsigned int DW0;
+ struct {
+ unsigned int offset : 25;
+ unsigned int id : 7;
+ };
+ };
+
+ union {
+ unsigned int DW1;
+ struct {
+ unsigned int load_at_boot : 1;
+ unsigned int load_at_vddgfx : 1;
+ unsigned int load_at_reset : 1;
+ unsigned int memory_destination : 2;
+ unsigned int vfflr_image_code : 4;
+ unsigned int load_mode_direct : 1;
+ unsigned int save_for_vddgfx : 1;
+ unsigned int save_for_vfflr : 1;
+ unsigned int reserved : 1;
+ unsigned int signed_source : 1;
+ unsigned int size : 18;
+ };
+ };
+
+ union {
+ unsigned int DW2;
+ struct {
+ unsigned int indirect_addr_reg : 16;
+ unsigned int index : 16;
+ };
+ };
+
+ union {
+ unsigned int DW3;
+ struct {
+ unsigned int indirect_data_reg : 16;
+ unsigned int indirect_start_offset : 16;
+ };
+ };
+} RLC_TABLE_OF_CONTENT;
+
+#define RLC_TOC_MAX_SIZE 64
+
+struct amdgpu_rlc_funcs {
+ bool (*is_rlc_enabled)(struct amdgpu_device *adev);
+ void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+ void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+ int (*init)(struct amdgpu_device *adev);
+ u32 (*get_csb_size)(struct amdgpu_device *adev);
+ void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
+ int (*get_cp_table_num)(struct amdgpu_device *adev);
+ int (*resume)(struct amdgpu_device *adev);
+ void (*stop)(struct amdgpu_device *adev);
+ void (*reset)(struct amdgpu_device *adev);
+ void (*start)(struct amdgpu_device *adev);
+ void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+ bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
+};
+
+struct amdgpu_rlcg_reg_access_ctrl {
+ uint32_t scratch_reg0;
+ uint32_t scratch_reg1;
+ uint32_t scratch_reg2;
+ uint32_t scratch_reg3;
+ uint32_t grbm_cntl;
+ uint32_t grbm_idx;
+ uint32_t spare_int;
+};
+
+struct amdgpu_rlc {
+ /* for power gating */
+ struct amdgpu_bo *save_restore_obj;
+ uint64_t save_restore_gpu_addr;
+ volatile uint32_t *sr_ptr;
+ const u32 *reg_list;
+ u32 reg_list_size;
+ /* for clear state */
+ struct amdgpu_bo *clear_state_obj;
+ uint64_t clear_state_gpu_addr;
+ volatile uint32_t *cs_ptr;
+ const struct cs_section_def *cs_data;
+ u32 clear_state_size;
+ /* for cp tables */
+ struct amdgpu_bo *cp_table_obj;
+ uint64_t cp_table_gpu_addr;
+ volatile uint32_t *cp_table_ptr;
+ u32 cp_table_size;
+
+ /* safe mode for updating CG/PG state */
+ bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
+ const struct amdgpu_rlc_funcs *funcs;
+
+ /* for firmware data */
+ u32 save_and_restore_offset;
+ u32 clear_state_descriptor_offset;
+ u32 avail_scratch_ram_locations;
+ u32 reg_restore_list_size;
+ u32 reg_list_format_start;
+ u32 reg_list_format_separate_start;
+ u32 starting_offsets_start;
+ u32 reg_list_format_size_bytes;
+ u32 reg_list_size_bytes;
+ u32 reg_list_format_direct_reg_list_length;
+ u32 save_restore_list_cntl_size_bytes;
+ u32 save_restore_list_gpm_size_bytes;
+ u32 save_restore_list_srm_size_bytes;
+ u32 rlc_iram_ucode_size_bytes;
+ u32 rlc_dram_ucode_size_bytes;
+ u32 rlcp_ucode_size_bytes;
+ u32 rlcv_ucode_size_bytes;
+ u32 global_tap_delays_ucode_size_bytes;
+ u32 se0_tap_delays_ucode_size_bytes;
+ u32 se1_tap_delays_ucode_size_bytes;
+ u32 se2_tap_delays_ucode_size_bytes;
+ u32 se3_tap_delays_ucode_size_bytes;
+
+ u32 *register_list_format;
+ u32 *register_restore;
+ u8 *save_restore_list_cntl;
+ u8 *save_restore_list_gpm;
+ u8 *save_restore_list_srm;
+ u8 *rlc_iram_ucode;
+ u8 *rlc_dram_ucode;
+ u8 *rlcp_ucode;
+ u8 *rlcv_ucode;
+ u8 *global_tap_delays_ucode;
+ u8 *se0_tap_delays_ucode;
+ u8 *se1_tap_delays_ucode;
+ u8 *se2_tap_delays_ucode;
+ u8 *se3_tap_delays_ucode;
+
+ bool is_rlc_v2_1;
+
+ /* for rlc autoload */
+ struct amdgpu_bo *rlc_autoload_bo;
+ u64 rlc_autoload_gpu_addr;
+ void *rlc_autoload_ptr;
+
+ /* rlc toc buffer */
+ struct amdgpu_bo *rlc_toc_bo;
+ uint64_t rlc_toc_gpu_addr;
+ void *rlc_toc_buf;
+
+ bool rlcg_reg_access_supported;
+ /* registers for rlcg indirect reg access */
+ struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES];
+};
+
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
+int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
+int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+ uint16_t version_major,
+ uint16_t version_minor);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
new file mode 100644
index 000000000..10df73199
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ */
+/* Algorithm:
+ *
+ * We store the last allocated bo in "hole", we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
+
+#include "amdgpu.h"
+
+int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
+ struct amdgpu_sa_manager *sa_manager,
+ unsigned int size, u32 suballoc_align, u32 domain)
+{
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
+ &sa_manager->bo, &sa_manager->gpu_addr,
+ &sa_manager->cpu_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
+ return r;
+ }
+
+ memset(sa_manager->cpu_ptr, 0, size);
+ drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
+ return r;
+}
+
+void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
+ struct amdgpu_sa_manager *sa_manager)
+{
+ if (sa_manager->bo == NULL) {
+ dev_err(adev->dev, "no bo for sa manager\n");
+ return;
+ }
+
+ drm_suballoc_manager_fini(&sa_manager->base);
+
+ amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+}
+
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct drm_suballoc **sa_bo,
+ unsigned int size)
+{
+ struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+ GFP_KERNEL, false, 0);
+
+ if (IS_ERR(sa)) {
+ *sa_bo = NULL;
+
+ return PTR_ERR(sa);
+ }
+
+ *sa_bo = sa;
+ return 0;
+}
+
+void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
+ struct dma_fence *fence)
+{
+ if (sa_bo == NULL || *sa_bo == NULL) {
+ return;
+ }
+
+ drm_suballoc_free(*sa_bo, fence);
+ *sa_bo = NULL;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
+ struct seq_file *m)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
new file mode 100644
index 000000000..863b2a34b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/pid.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_vm.h"
+
+static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
+ int fd,
+ int32_t priority)
+{
+ struct fd f = fdget(fd);
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ctx_mgr *mgr;
+ struct amdgpu_ctx *ctx;
+ uint32_t id;
+ int r;
+
+ if (!f.file)
+ return -EINVAL;
+
+ r = amdgpu_file_to_fpriv(f.file, &fpriv);
+ if (r) {
+ fdput(f);
+ return r;
+ }
+
+ mgr = &fpriv->ctx_mgr;
+ mutex_lock(&mgr->lock);
+ idr_for_each_entry(&mgr->ctx_handles, ctx, id)
+ amdgpu_ctx_priority_override(ctx, priority);
+ mutex_unlock(&mgr->lock);
+
+ fdput(f);
+ return 0;
+}
+
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+ int fd,
+ unsigned ctx_id,
+ int32_t priority)
+{
+ struct fd f = fdget(fd);
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ctx *ctx;
+ int r;
+
+ if (!f.file)
+ return -EINVAL;
+
+ r = amdgpu_file_to_fpriv(f.file, &fpriv);
+ if (r) {
+ fdput(f);
+ return r;
+ }
+
+ ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+ if (!ctx) {
+ fdput(f);
+ return -EINVAL;
+ }
+
+ amdgpu_ctx_priority_override(ctx, priority);
+ amdgpu_ctx_put(ctx);
+ fdput(f);
+
+ return 0;
+}
+
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_sched *args = data;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int r;
+
+ /* First check the op, then the op's argument.
+ */
+ switch (args->in.op) {
+ case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
+ case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+ break;
+ default:
+ DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
+ return -EINVAL;
+ }
+
+ if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
+ WARN(1, "Invalid context priority %d\n", args->in.priority);
+ return -EINVAL;
+ }
+
+ switch (args->in.op) {
+ case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
+ r = amdgpu_sched_process_priority_override(adev,
+ args->in.fd,
+ args->in.priority);
+ break;
+ case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+ r = amdgpu_sched_context_priority_override(adev,
+ args->in.fd,
+ args->in.ctx_id,
+ args->in.priority);
+ break;
+ default:
+ /* Impossible.
+ */
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
new file mode 100644
index 000000000..67e5b2472
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#ifndef __AMDGPU_SCHED_H__
+#define __AMDGPU_SCHED_H__
+
+enum drm_sched_priority;
+
+struct drm_device;
+struct drm_file;
+
+int amdgpu_to_sched_priority(int amdgpu_priority,
+ enum drm_sched_priority *prio);
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif // __AMDGPU_SCHED_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
new file mode 100644
index 000000000..e2b9392d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
+
+#define AMDGPU_CSA_SDMA_SIZE 64
+/* SDMA CSA reside in the 3rd page of CSA */
+#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
+
+/*
+ * GPU SDMA IP block helpers function.
+ */
+
+struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ if (ring == &adev->sdma.instance[i].ring ||
+ ring == &adev->sdma.instance[i].page)
+ return &adev->sdma.instance[i];
+
+ return NULL;
+}
+
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (ring == &adev->sdma.instance[i].ring ||
+ ring == &adev->sdma.instance[i].page) {
+ *index = i;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint64_t csa_mc_addr;
+ uint32_t index = 0;
+ int r;
+
+ /* don't enable OS preemption on SDMA under SRIOV */
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+ return 0;
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ sdma[ring->idx].sdma_meta_data);
+ csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ } else {
+ r = amdgpu_sdma_get_index_from_ring(ring, &index);
+
+ if (r || index > 31)
+ csa_mc_addr = 0;
+ else
+ csa_mc_addr = amdgpu_csa_vaddr(adev) +
+ AMDGPU_CSA_SDMA_OFFSET +
+ index * AMDGPU_CSA_SDMA_SIZE;
+ }
+
+ return csa_mc_addr;
+}
+
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ if (r)
+ goto late_fini;
+ }
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_RAS_SUCCESS;
+
+ amdgpu_ras_reset_gpu(adev);
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->sdma.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
+static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ uint16_t version_major;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const struct sdma_firmware_header_v2_0 *hdr_v2;
+
+ header = (const struct common_firmware_header *)
+ sdma_inst->fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ switch (version_major) {
+ case 1:
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ break;
+ case 2:
+ hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ if (duplicate)
+ break;
+ }
+
+ memset((void *)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+ u32 instance, bool duplicate)
+{
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ int err, i;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+ uint16_t version_major;
+ char ucode_prefix[30];
+ char fw_name[40];
+
+ amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (instance == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%d.bin", ucode_prefix, instance);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, fw_name);
+ if (err)
+ goto out;
+
+ header = (const struct common_firmware_header *)
+ adev->sdma.instance[instance].fw->data;
+ version_major = le16_to_cpu(header->header_version_major);
+
+ if ((duplicate && instance) || (!duplicate && version_major > 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]);
+ if (err)
+ goto out;
+
+ if (duplicate) {
+ for (i = 1; i < adev->sdma.num_instances; i++)
+ memcpy((void *)&adev->sdma.instance[i],
+ (void *)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (version_major) {
+ case 1:
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!duplicate && (instance != i))
+ continue;
+ else {
+ /* Use a single copy per SDMA firmware type. PSP uses the same instance for all
+ * groups of SDMAs */
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.num_inst_per_aid == i) {
+ break;
+ }
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+ break;
+ case 2:
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ }
+
+out:
+ if (err)
+ amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
+ return err;
+}
+
+void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *sdma;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue) {
+ sdma = &adev->sdma.instance[i].page;
+ if (adev->mman.buffer_funcs_ring == sdma) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ break;
+ }
+ }
+ sdma = &adev->sdma.instance[i].ring;
+ if (adev->mman.buffer_funcs_ring == sdma) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ break;
+ }
+ }
+}
+
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err = 0;
+ struct amdgpu_sdma_ras *ras = NULL;
+
+ /* adev->sdma.ras is NULL, which means sdma does not
+ * support ras function, then do nothing here.
+ */
+ if (!adev->sdma.ras)
+ return 0;
+
+ ras = adev->sdma.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register sdma ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "sdma");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->sdma.ras_if = &ras->ras_block.ras_comm;
+
+ /* If not define special ras_late_init function, use default ras_late_init */
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
+
+ /* If not defined special ras_cb function, use default ras_cb */
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
new file mode 100644
index 000000000..513ac2212
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SDMA_H__
+#define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
+
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES 16
+
+enum amdgpu_sdma_irq {
+ AMDGPU_SDMA_IRQ_INSTANCE0 = 0,
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_SDMA_IRQ_INSTANCE2,
+ AMDGPU_SDMA_IRQ_INSTANCE3,
+ AMDGPU_SDMA_IRQ_INSTANCE4,
+ AMDGPU_SDMA_IRQ_INSTANCE5,
+ AMDGPU_SDMA_IRQ_INSTANCE6,
+ AMDGPU_SDMA_IRQ_INSTANCE7,
+ AMDGPU_SDMA_IRQ_INSTANCE8,
+ AMDGPU_SDMA_IRQ_INSTANCE9,
+ AMDGPU_SDMA_IRQ_INSTANCE10,
+ AMDGPU_SDMA_IRQ_INSTANCE11,
+ AMDGPU_SDMA_IRQ_INSTANCE12,
+ AMDGPU_SDMA_IRQ_INSTANCE13,
+ AMDGPU_SDMA_IRQ_INSTANCE14,
+ AMDGPU_SDMA_IRQ_INSTANCE15,
+ AMDGPU_SDMA_IRQ_LAST
+};
+
+#define NUM_SDMA(x) hweight32(x)
+
+struct amdgpu_sdma_instance {
+ /* SDMA firmware */
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_ring ring;
+ struct amdgpu_ring page;
+ bool burst_nop;
+ uint32_t aid_id;
+};
+
+enum amdgpu_sdma_ras_memory_id {
+ AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
+ AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
+ AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
+ AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
+ AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
+ AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
+ AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
+ AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
+ AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
+ AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
+ AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
+ AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
+ AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
+ AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
+ AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
+ AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
+ AMDGPU_SDMA_UCODE_BUF = 17,
+ AMDGPU_SDMA_RB_CMD_BUF = 18,
+ AMDGPU_SDMA_IB_CMD_BUF = 19,
+ AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
+ AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
+ AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
+ AMDGPU_SDMA_DATA_LUT_FIFO = 23,
+ AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
+ AMDGPU_SDMA_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_sdma_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_sdma {
+ struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_irq_src trap_irq;
+ struct amdgpu_irq_src illegal_inst_irq;
+ struct amdgpu_irq_src ecc_irq;
+ struct amdgpu_irq_src vm_hole_irq;
+ struct amdgpu_irq_src doorbell_invalid_irq;
+ struct amdgpu_irq_src pool_timeout_irq;
+ struct amdgpu_irq_src srbm_write_irq;
+
+ int num_instances;
+ uint32_t sdma_mask;
+ int num_inst_per_aid;
+ uint32_t srbm_soft_reset;
+ bool has_page_queue;
+ struct ras_common_if *ras_if;
+ struct amdgpu_sdma_ras *ras;
+};
+
+/*
+ * Provided by hw blocks that can move/clear data. e.g., gfx or sdma
+ * But currently, we use sdma to move data.
+ */
+struct amdgpu_buffer_funcs {
+ /* maximum bytes in a single operation */
+ uint32_t copy_max_bytes;
+
+ /* number of dw to reserve per operation */
+ unsigned copy_num_dw;
+
+ /* used for buffer migration */
+ void (*emit_copy_buffer)(struct amdgpu_ib *ib,
+ /* src addr in bytes */
+ uint64_t src_offset,
+ /* dst addr in bytes */
+ uint64_t dst_offset,
+ /* number of byte to transfer */
+ uint32_t byte_count,
+ bool tmz);
+
+ /* maximum bytes in a single operation */
+ uint32_t fill_max_bytes;
+
+ /* number of dw to reserve per operation */
+ unsigned fill_num_dw;
+
+ /* used for buffer clearing */
+ void (*emit_fill_buffer)(struct amdgpu_ib *ib,
+ /* value to write to memory */
+ uint32_t src_data,
+ /* dst addr in bytes */
+ uint64_t dst_offset,
+ /* number of byte to fill */
+ uint32_t byte_count);
+};
+
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
+
+struct amdgpu_sdma_instance *
+amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+ bool duplicate);
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+ bool duplicate);
+void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
new file mode 100644
index 000000000..8ed0e0736
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_securedisplay.h"
+
+/**
+ * DOC: AMDGPU SECUREDISPLAY debugfs test interface
+ *
+ * how to use?
+ * echo opcode <value> > <debugfs_dir>/dri/xxx/securedisplay_test
+ * eg. echo 1 > <debugfs_dir>/dri/xxx/securedisplay_test
+ * eg. echo 2 phy_id > <debugfs_dir>/dri/xxx/securedisplay_test
+ *
+ * opcode:
+ * 1:Query whether TA is responding used only for validation pupose
+ * 2: Send region of Interest and CRC value to I2C. (uint32)phy_id is
+ * send to determine which DIO scratch register should be used to get
+ * ROI and receive i2c_buf as the output.
+ *
+ * You can refer more detail from header file ta_securedisplay_if.h
+ *
+ */
+
+void psp_securedisplay_parse_resp_status(struct psp_context *psp,
+ enum ta_securedisplay_status status)
+{
+ switch (status) {
+ case TA_SECUREDISPLAY_STATUS__SUCCESS:
+ break;
+ case TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE:
+ dev_err(psp->adev->dev, "Secure display: Generic Failure.");
+ break;
+ case TA_SECUREDISPLAY_STATUS__INVALID_PARAMETER:
+ dev_err(psp->adev->dev, "Secure display: Invalid Parameter.");
+ break;
+ case TA_SECUREDISPLAY_STATUS__NULL_POINTER:
+ dev_err(psp->adev->dev, "Secure display: Null Pointer.");
+ break;
+ case TA_SECUREDISPLAY_STATUS__I2C_WRITE_ERROR:
+ dev_err(psp->adev->dev, "Secure display: Failed to write to I2C.");
+ break;
+ case TA_SECUREDISPLAY_STATUS__READ_DIO_SCRATCH_ERROR:
+ dev_err(psp->adev->dev, "Secure display: Failed to Read DIO Scratch Register.");
+ break;
+ case TA_SECUREDISPLAY_STATUS__READ_CRC_ERROR:
+ dev_err(psp->adev->dev, "Secure display: Failed to Read CRC");
+ break;
+ case TA_SECUREDISPLAY_STATUS__I2C_INIT_ERROR:
+ dev_err(psp->adev->dev, "Secure display: Failed to initialize I2C.");
+ break;
+ default:
+ dev_err(psp->adev->dev, "Secure display: Failed to parse status: %d\n", status);
+ }
+}
+
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
+ enum ta_securedisplay_command command_id)
+{
+ *cmd = (struct ta_securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf;
+ memset(*cmd, 0, sizeof(struct ta_securedisplay_cmd));
+ (*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE;
+ (*cmd)->cmd_id = command_id;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_securedisplay_cmd *securedisplay_cmd;
+ struct drm_device *dev = adev_to_drm(adev);
+ uint32_t phy_id;
+ uint32_t op;
+ char str[64];
+ int ret;
+
+ if (*pos || size > sizeof(str) - 1)
+ return -EINVAL;
+
+ memset(str, 0, sizeof(str));
+ ret = copy_from_user(str, buf, size);
+ if (ret)
+ return -EFAULT;
+
+ ret = pm_runtime_get_sync(dev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return ret;
+ }
+
+ if (size < 3)
+ sscanf(str, "%u ", &op);
+ else
+ sscanf(str, "%u %u", &op, &phy_id);
+
+ switch (op) {
+ case 1:
+ mutex_lock(&psp->securedisplay_context.mutex);
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+ if (!ret) {
+ if (securedisplay_cmd->status == TA_SECUREDISPLAY_STATUS__SUCCESS)
+ dev_info(adev->dev, "SECUREDISPLAY: query securedisplay TA ret is 0x%X\n",
+ securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+ else
+ psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+ }
+ mutex_unlock(&psp->securedisplay_context.mutex);
+ break;
+ case 2:
+ mutex_lock(&psp->securedisplay_context.mutex);
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id;
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ if (!ret) {
+ if (securedisplay_cmd->status == TA_SECUREDISPLAY_STATUS__SUCCESS) {
+ dev_info(adev->dev, "SECUREDISPLAY: I2C buffer out put is: %*ph\n",
+ TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
+ securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
+ } else {
+ psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+ }
+ }
+ mutex_unlock(&psp->securedisplay_context.mutex);
+ break;
+ default:
+ dev_err(adev->dev, "Invalid input: %s\n", str);
+ }
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return size;
+}
+
+static const struct file_operations amdgpu_securedisplay_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_securedisplay_debugfs_write,
+ .llseek = default_llseek
+};
+
+#endif
+
+void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+
+ if (!adev->psp.securedisplay_context.context.initialized)
+ return;
+
+ debugfs_create_file("securedisplay_test", S_IWUSR, adev_to_drm(adev)->primary->debugfs_root,
+ adev, &amdgpu_securedisplay_debugfs_ops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
new file mode 100644
index 000000000..456ad68ed
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_SECUREDISPLAY_H
+#define _AMDGPU_SECUREDISPLAY_H
+
+#include "amdgpu.h"
+#include "ta_secureDisplay_if.h"
+
+void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev);
+void psp_securedisplay_parse_resp_status(struct psp_context *psp,
+ enum ta_securedisplay_status status);
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
+ enum ta_securedisplay_command command_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
new file mode 100644
index 000000000..89c38d864
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_SMUIO_H__
+#define __AMDGPU_SMUIO_H__
+
+struct amdgpu_smuio_funcs {
+ u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
+ u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
+ void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
+ u32 (*get_die_id)(struct amdgpu_device *adev);
+ u32 (*get_socket_id)(struct amdgpu_device *adev);
+ enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
+ bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_smuio {
+ const struct amdgpu_smuio_funcs *funcs;
+};
+
+#endif /* __AMDGPU_SMUIO_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
new file mode 100644
index 000000000..f4176cb01
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_SOCBB_H__
+#define __AMDGPU_SOCBB_H__
+
+struct gpu_info_voltage_scaling_v1_0 {
+ uint32_t state;
+ uint32_t dscclk_mhz;
+ uint32_t dcfclk_mhz;
+ uint32_t socclk_mhz;
+ uint32_t dram_speed_mts;
+ uint32_t fabricclk_mhz;
+ uint32_t dispclk_mhz;
+ uint32_t phyclk_mhz;
+ uint32_t dppclk_mhz;
+};
+
+struct gpu_info_soc_bounding_box_v1_0 {
+ uint32_t sr_exit_time_us;
+ uint32_t sr_enter_plus_exit_time_us;
+ uint32_t urgent_latency_us;
+ uint32_t urgent_latency_pixel_data_only_us;
+ uint32_t urgent_latency_pixel_mixed_with_vm_data_us;
+ uint32_t urgent_latency_vm_data_only_us;
+ uint32_t writeback_latency_us;
+ uint32_t ideal_dram_bw_after_urgent_percent;
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+ uint32_t max_avg_sdp_bw_use_normal_percent;
+ uint32_t max_avg_dram_bw_use_normal_percent;
+ uint32_t max_request_size_bytes;
+ uint32_t downspread_percent;
+ uint32_t dram_page_open_time_ns;
+ uint32_t dram_rw_turnaround_time_ns;
+ uint32_t dram_return_buffer_per_channel_bytes;
+ uint32_t dram_channel_width_bytes;
+ uint32_t fabric_datapath_to_dcn_data_return_bytes;
+ uint32_t dcn_downspread_percent;
+ uint32_t dispclk_dppclk_vco_speed_mhz;
+ uint32_t dfs_vco_period_ps;
+ uint32_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ uint32_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ uint32_t urgent_out_of_order_return_per_channel_vm_only_bytes;
+ uint32_t round_trip_ping_latency_dcfclk_cycles;
+ uint32_t urgent_out_of_order_return_per_channel_bytes;
+ uint32_t channel_interleave_bytes;
+ uint32_t num_banks;
+ uint32_t num_chans;
+ uint32_t vmm_page_size_bytes;
+ uint32_t dram_clock_change_latency_us;
+ uint32_t writeback_dram_clock_change_latency_us;
+ uint32_t return_bus_width_bytes;
+ uint32_t voltage_override;
+ uint32_t xfc_bus_transport_time_us;
+ uint32_t xfc_xbuf_latency_tolerance_us;
+ uint32_t use_urgent_burst_bw;
+ uint32_t num_states;
+ struct gpu_info_voltage_scaling_v1_0 clock_limits[8];
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
new file mode 100644
index 000000000..dcd8c066b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/dma-fence-chain.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+
+struct amdgpu_sync_entry {
+ struct hlist_node node;
+ struct dma_fence *fence;
+};
+
+static struct kmem_cache *amdgpu_sync_slab;
+
+/**
+ * amdgpu_sync_create - zero init sync object
+ *
+ * @sync: sync object to initialize
+ *
+ * Just clear the sync object for now.
+ */
+void amdgpu_sync_create(struct amdgpu_sync *sync)
+{
+ hash_init(sync->fences);
+}
+
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
+ struct dma_fence *f)
+{
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+ if (s_fence) {
+ struct amdgpu_ring *ring;
+
+ ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
+ return ring->adev == adev;
+ }
+
+ return false;
+}
+
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @f: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct dma_fence *f)
+{
+ struct drm_sched_fence *s_fence;
+ struct amdgpu_amdkfd_fence *kfd_fence;
+
+ if (!f)
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
+
+ s_fence = to_drm_sched_fence(f);
+ if (s_fence)
+ return s_fence->owner;
+
+ kfd_fence = to_amdgpu_amdkfd_fence(f);
+ if (kfd_fence)
+ return AMDGPU_FENCE_OWNER_KFD;
+
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
+}
+
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
+static void amdgpu_sync_keep_later(struct dma_fence **keep,
+ struct dma_fence *fence)
+{
+ if (*keep && dma_fence_is_later(*keep, fence))
+ return;
+
+ dma_fence_put(*keep);
+ *keep = dma_fence_get(fence);
+}
+
+/**
+ * amdgpu_sync_add_later - add the fence to the hash
+ *
+ * @sync: sync object to add the fence to
+ * @f: fence to add
+ *
+ * Tries to add the fence to an existing hash entry. Returns true when an entry
+ * was found, false otherwise.
+ */
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
+{
+ struct amdgpu_sync_entry *e;
+
+ hash_for_each_possible(sync->fences, e, node, f->context) {
+ if (unlikely(e->fence->context != f->context))
+ continue;
+
+ amdgpu_sync_keep_later(&e->fence, f);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * amdgpu_sync_fence - remember to sync to this fence
+ *
+ * @sync: sync object to add fence to
+ * @f: fence to sync to
+ *
+ * Add the fence to the sync object.
+ */
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+{
+ struct amdgpu_sync_entry *e;
+
+ if (!f)
+ return 0;
+
+ if (amdgpu_sync_add_later(sync, f))
+ return 0;
+
+ e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ hash_add(sync->fences, &e->node, f->context);
+ e->fence = dma_fence_get(f);
+ return 0;
+}
+
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+ enum amdgpu_sync_mode mode,
+ void *owner, struct dma_fence *f)
+{
+ void *fence_owner = amdgpu_sync_get_owner(f);
+
+ /* Always sync to moves, no matter what */
+ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+ return true;
+
+ /* We only want to trigger KFD eviction fences on
+ * evict or move jobs. Skip KFD fences otherwise.
+ */
+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Never sync to VM updates either. */
+ if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Ignore fences depending on the sync mode */
+ switch (mode) {
+ case AMDGPU_SYNC_ALWAYS:
+ return true;
+
+ case AMDGPU_SYNC_NE_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner == owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EQ_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner != owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EXPLICIT:
+ return false;
+ }
+
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
+ return true;
+}
+
+/**
+ * amdgpu_sync_resv - sync to a reservation object
+ *
+ * @adev: amdgpu device
+ * @sync: sync object to add fences from reservation object to
+ * @resv: reservation object with embedded fence
+ * @mode: how owner affects which fences we sync to
+ * @owner: owner of the planned job submission
+ *
+ * Sync to the fence
+ */
+int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+ struct dma_resv *resv, enum amdgpu_sync_mode mode,
+ void *owner)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *f;
+ int r;
+
+ if (resv == NULL)
+ return -EINVAL;
+
+ /* TODO: Use DMA_RESV_USAGE_READ here */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
+ dma_fence_chain_for_each(f, f) {
+ struct dma_fence *tmp = dma_fence_chain_contained(f);
+
+ if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
+ r = amdgpu_sync_fence(sync, f);
+ dma_fence_put(f);
+ if (r)
+ return r;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Free the entry back to the slab */
+static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
+{
+ hash_del(&e->node);
+ dma_fence_put(e->fence);
+ kmem_cache_free(amdgpu_sync_slab, e);
+}
+
+/**
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
+ *
+ * @sync: the sync object
+ * @ring: optional ring to use for test
+ *
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
+ */
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ int i;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ struct dma_fence *f = e->fence;
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+ if (dma_fence_is_signaled(f)) {
+ amdgpu_sync_entry_free(e);
+ continue;
+ }
+ if (ring && s_fence) {
+ /* For fences from the same ring it is sufficient
+ * when they are scheduled.
+ */
+ if (s_fence->sched == &ring->sched) {
+ if (dma_fence_is_signaled(&s_fence->scheduled))
+ continue;
+
+ return &s_fence->scheduled;
+ }
+ }
+
+ return f;
+ }
+
+ return NULL;
+}
+
+/**
+ * amdgpu_sync_get_fence - get the next fence from the sync object
+ *
+ * @sync: sync object to use
+ *
+ * Get and removes the next fence from the sync object not signaled yet.
+ */
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+
+ f = e->fence;
+
+ hash_del(&e->node);
+ kmem_cache_free(amdgpu_sync_slab, e);
+
+ if (!dma_fence_is_signaled(f))
+ return f;
+
+ dma_fence_put(f);
+ }
+ return NULL;
+}
+
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i, r;
+
+ hash_for_each_safe(source->fences, i, tmp, e, node) {
+ f = e->fence;
+ if (!dma_fence_is_signaled(f)) {
+ r = amdgpu_sync_fence(clone, f);
+ if (r)
+ return r;
+ } else {
+ amdgpu_sync_entry_free(e);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_sync_push_to_job - push fences into job
+ * @sync: sync object to get the fences from
+ * @job: job to push the fences into
+ *
+ * Add all unsignaled fences from sync to job.
+ */
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ f = e->fence;
+ if (dma_fence_is_signaled(f)) {
+ amdgpu_sync_entry_free(e);
+ continue;
+ }
+
+ dma_fence_get(f);
+ r = drm_sched_job_add_dependency(&job->base, f);
+ if (r) {
+ dma_fence_put(f);
+ return r;
+ }
+ }
+ return 0;
+}
+
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ int i, r;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node) {
+ r = dma_fence_wait(e->fence, intr);
+ if (r)
+ return r;
+
+ amdgpu_sync_entry_free(e);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_sync_free - free the sync object
+ *
+ * @sync: sync object to use
+ *
+ * Free the sync object.
+ */
+void amdgpu_sync_free(struct amdgpu_sync *sync)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ unsigned int i;
+
+ hash_for_each_safe(sync->fences, i, tmp, e, node)
+ amdgpu_sync_entry_free(e);
+}
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+ amdgpu_sync_slab = kmem_cache_create(
+ "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_sync_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+ kmem_cache_destroy(amdgpu_sync_slab);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
new file mode 100644
index 000000000..cf1e9e858
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_SYNC_H__
+#define __AMDGPU_SYNC_H__
+
+#include <linux/hashtable.h>
+
+struct dma_fence;
+struct dma_resv;
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_job;
+
+enum amdgpu_sync_mode {
+ AMDGPU_SYNC_ALWAYS,
+ AMDGPU_SYNC_NE_OWNER,
+ AMDGPU_SYNC_EQ_OWNER,
+ AMDGPU_SYNC_EXPLICIT
+};
+
+/*
+ * Container for fences used to sync command submissions.
+ */
+struct amdgpu_sync {
+ DECLARE_HASHTABLE(fences, 4);
+};
+
+void amdgpu_sync_create(struct amdgpu_sync *sync);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
+int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+ struct dma_resv *resv, enum amdgpu_sync_mode mode,
+ void *owner);
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+ struct amdgpu_ring *ring);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
new file mode 100644
index 000000000..2fd1bfb35
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -0,0 +1,563 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#if !defined(_AMDGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _AMDGPU_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM amdgpu
+#define TRACE_INCLUDE_FILE amdgpu_trace
+
+#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
+ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
+
+TRACE_EVENT(amdgpu_device_rreg,
+ TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+ TP_ARGS(did, reg, value),
+ TP_STRUCT__entry(
+ __field(unsigned, did)
+ __field(uint32_t, reg)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->did = did;
+ __entry->reg = reg;
+ __entry->value = value;
+ ),
+ TP_printk("0x%04lx, 0x%08lx, 0x%08lx",
+ (unsigned long)__entry->did,
+ (unsigned long)__entry->reg,
+ (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_device_wreg,
+ TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+ TP_ARGS(did, reg, value),
+ TP_STRUCT__entry(
+ __field(unsigned, did)
+ __field(uint32_t, reg)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->did = did;
+ __entry->reg = reg;
+ __entry->value = value;
+ ),
+ TP_printk("0x%04lx, 0x%08lx, 0x%08lx",
+ (unsigned long)__entry->did,
+ (unsigned long)__entry->reg,
+ (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_iv,
+ TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv),
+ TP_ARGS(ih, iv),
+ TP_STRUCT__entry(
+ __field(unsigned, ih)
+ __field(unsigned, client_id)
+ __field(unsigned, src_id)
+ __field(unsigned, ring_id)
+ __field(unsigned, vmid)
+ __field(unsigned, vmid_src)
+ __field(uint64_t, timestamp)
+ __field(unsigned, timestamp_src)
+ __field(unsigned, pasid)
+ __array(unsigned, src_data, 4)
+ ),
+ TP_fast_assign(
+ __entry->ih = ih;
+ __entry->client_id = iv->client_id;
+ __entry->src_id = iv->src_id;
+ __entry->ring_id = iv->ring_id;
+ __entry->vmid = iv->vmid;
+ __entry->vmid_src = iv->vmid_src;
+ __entry->timestamp = iv->timestamp;
+ __entry->timestamp_src = iv->timestamp_src;
+ __entry->pasid = iv->pasid;
+ __entry->src_data[0] = iv->src_data[0];
+ __entry->src_data[1] = iv->src_data[1];
+ __entry->src_data[2] = iv->src_data[2];
+ __entry->src_data[3] = iv->src_data[3];
+ ),
+ TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u "
+ "timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
+ __entry->ih, __entry->client_id, __entry->src_id,
+ __entry->ring_id, __entry->vmid,
+ __entry->timestamp, __entry->pasid,
+ __entry->src_data[0], __entry->src_data[1],
+ __entry->src_data[2], __entry->src_data[3])
+);
+
+
+TRACE_EVENT(amdgpu_bo_create,
+ TP_PROTO(struct amdgpu_bo *bo),
+ TP_ARGS(bo),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo *, bo)
+ __field(u32, pages)
+ __field(u32, type)
+ __field(u32, prefer)
+ __field(u32, allow)
+ __field(u32, visible)
+ ),
+
+ TP_fast_assign(
+ __entry->bo = bo;
+ __entry->pages = PFN_UP(bo->tbo.resource->size);
+ __entry->type = bo->tbo.resource->mem_type;
+ __entry->prefer = bo->preferred_domains;
+ __entry->allow = bo->allowed_domains;
+ __entry->visible = bo->flags;
+ ),
+
+ TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d",
+ __entry->bo, __entry->pages, __entry->type,
+ __entry->prefer, __entry->allow, __entry->visible)
+);
+
+TRACE_EVENT(amdgpu_cs,
+ TP_PROTO(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib),
+ TP_ARGS(p, job, ib),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo_list *, bo_list)
+ __field(u32, ring)
+ __field(u32, dw)
+ __field(u32, fences)
+ ),
+
+ TP_fast_assign(
+ __entry->bo_list = p->bo_list;
+ __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx;
+ __entry->dw = ib->length_dw;
+ __entry->fences = amdgpu_fence_count_emitted(
+ to_amdgpu_ring(job->base.entity->rq->sched));
+ ),
+ TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
+ __entry->bo_list, __entry->ring, __entry->dw,
+ __entry->fences)
+);
+
+TRACE_EVENT(amdgpu_cs_ioctl,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(uint64_t, sched_job_id)
+ __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __field(unsigned int, context)
+ __field(unsigned int, seqno)
+ __field(struct dma_fence *, fence)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->sched_job_id = job->base.id;
+ __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __entry->context = job->base.s_fence->finished.context;
+ __entry->seqno = job->base.s_fence->finished.seqno;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
+ __entry->sched_job_id, __get_str(timeline), __entry->context,
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(uint64_t, sched_job_id)
+ __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+ __field(unsigned int, context)
+ __field(unsigned int, seqno)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->sched_job_id = job->base.id;
+ __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __entry->context = job->base.s_fence->finished.context;
+ __entry->seqno = job->base.s_fence->finished.seqno;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
+ __entry->sched_job_id, __get_str(timeline), __entry->context,
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
+);
+
+
+TRACE_EVENT(amdgpu_vm_grab_id,
+ TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_job *job),
+ TP_ARGS(vm, ring, job),
+ TP_STRUCT__entry(
+ __field(u32, pasid)
+ __string(ring, ring->name)
+ __field(u32, ring)
+ __field(u32, vmid)
+ __field(u32, vm_hub)
+ __field(u64, pd_addr)
+ __field(u32, needs_flush)
+ ),
+
+ TP_fast_assign(
+ __entry->pasid = vm->pasid;
+ __assign_str(ring, ring->name);
+ __entry->vmid = job->vmid;
+ __entry->vm_hub = ring->vm_hub,
+ __entry->pd_addr = job->vm_pd_addr;
+ __entry->needs_flush = job->vm_needs_flush;
+ ),
+ TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+ __entry->pasid, __get_str(ring), __entry->vmid,
+ __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_map,
+ TP_PROTO(struct amdgpu_bo_va *bo_va,
+ struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(bo_va, mapping),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo *, bo)
+ __field(long, start)
+ __field(long, last)
+ __field(u64, offset)
+ __field(u64, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->bo = bo_va ? bo_va->base.bo : NULL;
+ __entry->start = mapping->start;
+ __entry->last = mapping->last;
+ __entry->offset = mapping->offset;
+ __entry->flags = mapping->flags;
+ ),
+ TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+ __entry->bo, __entry->start, __entry->last,
+ __entry->offset, __entry->flags)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_unmap,
+ TP_PROTO(struct amdgpu_bo_va *bo_va,
+ struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(bo_va, mapping),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo *, bo)
+ __field(long, start)
+ __field(long, last)
+ __field(u64, offset)
+ __field(u64, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->bo = bo_va ? bo_va->base.bo : NULL;
+ __entry->start = mapping->start;
+ __entry->last = mapping->last;
+ __entry->offset = mapping->offset;
+ __entry->flags = mapping->flags;
+ ),
+ TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+ __entry->bo, __entry->start, __entry->last,
+ __entry->offset, __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
+ TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(mapping),
+ TP_STRUCT__entry(
+ __field(u64, soffset)
+ __field(u64, eoffset)
+ __field(u64, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->soffset = mapping->start;
+ __entry->eoffset = mapping->last + 1;
+ __entry->flags = mapping->flags;
+ ),
+ TP_printk("soffs=%010llx, eoffs=%010llx, flags=%llx",
+ __entry->soffset, __entry->eoffset, __entry->flags)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_update,
+ TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(mapping)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
+ TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(mapping)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
+ TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(mapping)
+);
+
+TRACE_EVENT(amdgpu_vm_update_ptes,
+ TP_PROTO(struct amdgpu_vm_update_params *p,
+ uint64_t start, uint64_t end,
+ unsigned int nptes, uint64_t dst,
+ uint64_t incr, uint64_t flags,
+ pid_t pid, uint64_t vm_ctx),
+ TP_ARGS(p, start, end, nptes, dst, incr, flags, pid, vm_ctx),
+ TP_STRUCT__entry(
+ __field(u64, start)
+ __field(u64, end)
+ __field(u64, flags)
+ __field(unsigned int, nptes)
+ __field(u64, incr)
+ __field(pid_t, pid)
+ __field(u64, vm_ctx)
+ __dynamic_array(u64, dst, nptes)
+ ),
+
+ TP_fast_assign(
+ unsigned int i;
+
+ __entry->start = start;
+ __entry->end = end;
+ __entry->flags = flags;
+ __entry->incr = incr;
+ __entry->nptes = nptes;
+ __entry->pid = pid;
+ __entry->vm_ctx = vm_ctx;
+ for (i = 0; i < nptes; ++i) {
+ u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
+ p->pages_addr, dst) : dst;
+
+ ((u64 *)__get_dynamic_array(dst))[i] = addr;
+ dst += incr;
+ }
+ ),
+ TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
+ " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
+ __entry->vm_ctx, __entry->start, __entry->end,
+ __entry->flags, __entry->incr, __print_array(
+ __get_dynamic_array(dst), __entry->nptes, 8))
+);
+
+TRACE_EVENT(amdgpu_vm_set_ptes,
+ TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags, bool immediate),
+ TP_ARGS(pe, addr, count, incr, flags, immediate),
+ TP_STRUCT__entry(
+ __field(u64, pe)
+ __field(u64, addr)
+ __field(u32, count)
+ __field(u32, incr)
+ __field(u64, flags)
+ __field(bool, immediate)
+ ),
+
+ TP_fast_assign(
+ __entry->pe = pe;
+ __entry->addr = addr;
+ __entry->count = count;
+ __entry->incr = incr;
+ __entry->flags = flags;
+ __entry->immediate = immediate;
+ ),
+ TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
+ "immediate=%d", __entry->pe, __entry->addr, __entry->incr,
+ __entry->flags, __entry->count, __entry->immediate)
+);
+
+TRACE_EVENT(amdgpu_vm_copy_ptes,
+ TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate),
+ TP_ARGS(pe, src, count, immediate),
+ TP_STRUCT__entry(
+ __field(u64, pe)
+ __field(u64, src)
+ __field(u32, count)
+ __field(bool, immediate)
+ ),
+
+ TP_fast_assign(
+ __entry->pe = pe;
+ __entry->src = src;
+ __entry->count = count;
+ __entry->immediate = immediate;
+ ),
+ TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d",
+ __entry->pe, __entry->src, __entry->count,
+ __entry->immediate)
+);
+
+TRACE_EVENT(amdgpu_vm_flush,
+ TP_PROTO(struct amdgpu_ring *ring, unsigned vmid,
+ uint64_t pd_addr),
+ TP_ARGS(ring, vmid, pd_addr),
+ TP_STRUCT__entry(
+ __string(ring, ring->name)
+ __field(u32, vmid)
+ __field(u32, vm_hub)
+ __field(u64, pd_addr)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring, ring->name);
+ __entry->vmid = vmid;
+ __entry->vm_hub = ring->vm_hub;
+ __entry->pd_addr = pd_addr;
+ ),
+ TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
+ __get_str(ring), __entry->vmid,
+ __entry->vm_hub, __entry->pd_addr)
+);
+
+DECLARE_EVENT_CLASS(amdgpu_pasid,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid),
+ TP_STRUCT__entry(
+ __field(unsigned, pasid)
+ ),
+ TP_fast_assign(
+ __entry->pasid = pasid;
+ ),
+ TP_printk("pasid=%u", __entry->pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid)
+);
+
+TRACE_EVENT(amdgpu_bo_list_set,
+ TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
+ TP_ARGS(list, bo),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo_list *, list)
+ __field(struct amdgpu_bo *, bo)
+ __field(u64, bo_size)
+ ),
+
+ TP_fast_assign(
+ __entry->list = list;
+ __entry->bo = bo;
+ __entry->bo_size = amdgpu_bo_size(bo);
+ ),
+ TP_printk("list=%p, bo=%p, bo_size=%Ld",
+ __entry->list,
+ __entry->bo,
+ __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+ TP_PROTO(uint64_t total_bo, uint64_t total_size),
+ TP_ARGS(total_bo, total_size),
+ TP_STRUCT__entry(
+ __field(u64, total_bo)
+ __field(u64, total_size)
+ ),
+
+ TP_fast_assign(
+ __entry->total_bo = total_bo;
+ __entry->total_size = total_size;
+ ),
+ TP_printk("total_bo_size=%Ld, total_bo_count=%Ld",
+ __entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_bo_move,
+ TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement),
+ TP_ARGS(bo, new_placement, old_placement),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_bo *, bo)
+ __field(u64, bo_size)
+ __field(u32, new_placement)
+ __field(u32, old_placement)
+ ),
+
+ TP_fast_assign(
+ __entry->bo = bo;
+ __entry->bo_size = amdgpu_bo_size(bo);
+ __entry->new_placement = new_placement;
+ __entry->old_placement = old_placement;
+ ),
+ TP_printk("bo=%p, from=%d, to=%d, size=%Ld",
+ __entry->bo, __entry->old_placement,
+ __entry->new_placement, __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_ib_pipe_sync,
+ TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
+ TP_ARGS(sched_job, fence),
+ TP_STRUCT__entry(
+ __string(ring, sched_job->base.sched->name)
+ __field(uint64_t, id)
+ __field(struct dma_fence *, fence)
+ __field(uint64_t, ctx)
+ __field(unsigned, seqno)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring, sched_job->base.sched->name);
+ __entry->id = sched_job->base.id;
+ __entry->fence = fence;
+ __entry->ctx = fence->context;
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
+ __get_str(ring), __entry->id,
+ __entry->fence, __entry->ctx,
+ __entry->seqno)
+);
+
+TRACE_EVENT(amdgpu_reset_reg_dumps,
+ TP_PROTO(uint32_t address, uint32_t value),
+ TP_ARGS(address, value),
+ TP_STRUCT__entry(
+ __field(uint32_t, address)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->value = value;
+ ),
+ TP_printk("amdgpu register dump 0x%x: 0x%x",
+ __entry->address,
+ __entry->value)
+);
+
+#undef AMDGPU_JOB_GET_TIMELINE_NAME
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/amd/amdgpu
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
new file mode 100644
index 000000000..b96d885f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+/* Copyright Red Hat Inc 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author : Dave Airlie <airlied@redhat.com>
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu_cs.h"
+#include "amdgpu.h"
+
+#define CREATE_TRACE_POINTS
+#include "amdgpu_trace.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
new file mode 100644
index 000000000..4e51dce3a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -0,0 +1,2541 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Jerome Glisse <glisse@freedesktop.org>
+ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ * Dave Airlie
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/pagemap.h>
+#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/dma-buf.h>
+#include <linux/sizes.h>
+#include <linux/module.h>
+
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_res_cursor.h"
+#include "bif/bif_4_1_d.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
+
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_resource *bo_mem);
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
+ struct ttm_tt *ttm);
+
+static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
+ unsigned int type,
+ uint64_t size_in_page)
+{
+ return ttm_range_man_init(&adev->mman.bdev, type,
+ false, size_in_page);
+}
+
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
+static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
+ struct ttm_placement *placement)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_bo *abo;
+ static const struct ttm_place placements = {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = TTM_PL_SYSTEM,
+ .flags = 0
+ };
+
+ /* Don't handle scatter gather BOs */
+ if (bo->type == ttm_bo_type_sg) {
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+ }
+
+ /* Object isn't an AMDGPU object so ignore */
+ if (!amdgpu_bo_is_amdgpu_bo(bo)) {
+ placement->placement = &placements;
+ placement->busy_placement = &placements;
+ placement->num_placement = 1;
+ placement->num_busy_placement = 1;
+ return;
+ }
+
+ abo = ttm_to_amdgpu_bo(bo);
+ if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+ }
+
+ switch (bo->resource->mem_type) {
+ case AMDGPU_PL_GDS:
+ case AMDGPU_PL_GWS:
+ case AMDGPU_PL_OA:
+ case AMDGPU_PL_DOORBELL:
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+
+ case TTM_PL_VRAM:
+ if (!adev->mman.buffer_funcs_enabled) {
+ /* Move to system memory */
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+ amdgpu_bo_in_cpu_visible_vram(abo)) {
+
+ /* Try evicting to the CPU inaccessible part of VRAM
+ * first, but only set GTT as busy placement, so this
+ * BO will be evicted to GTT rather than causing other
+ * BOs to be evicted from VRAM
+ */
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
+ abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ abo->placements[0].lpfn = 0;
+ abo->placement.busy_placement = &abo->placements[1];
+ abo->placement.num_busy_placement = 1;
+ } else {
+ /* Move to GTT memory */
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
+ }
+ break;
+ case TTM_PL_TT:
+ case AMDGPU_PL_PREEMPT:
+ default:
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ break;
+ }
+ *placement = abo->placement;
+}
+
+/**
+ * amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @bo: buffer object to map
+ * @mem: memory object to map
+ * @mm_cur: range to map
+ * @window: which GART window to use
+ * @ring: DMA ring to use for the copy
+ * @tmz: if we should setup a TMZ enabled mapping
+ * @size: in number of bytes to map, out number of bytes mapped
+ * @addr: resulting address inside the MC address space
+ *
+ * Setup one of the GART windows to access a specific piece of memory or return
+ * the physical address for local memory.
+ */
+static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+ struct ttm_resource *mem,
+ struct amdgpu_res_cursor *mm_cur,
+ unsigned int window, struct amdgpu_ring *ring,
+ bool tmz, uint64_t *size, uint64_t *addr)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned int offset, num_pages, num_dw, num_bytes;
+ uint64_t src_addr, dst_addr;
+ struct amdgpu_job *job;
+ void *cpu_addr;
+ uint64_t flags;
+ unsigned int i;
+ int r;
+
+ BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
+ AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+
+ if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
+ return -EINVAL;
+
+ /* Map only what can't be accessed directly */
+ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
+ *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
+ mm_cur->start;
+ return 0;
+ }
+
+
+ /*
+ * If start begins at an offset inside the page, then adjust the size
+ * and addr accordingly
+ */
+ offset = mm_cur->start & ~PAGE_MASK;
+
+ num_pages = PFN_UP(*size + offset);
+ num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+
+ *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
+
+ *addr = adev->gmc.gart_start;
+ *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+ AMDGPU_GPU_PAGE_SIZE;
+ *addr += offset;
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_DELAYED, &job);
+ if (r)
+ return r;
+
+ src_addr = num_dw * 4;
+ src_addr += job->ibs[0].gpu_addr;
+
+ dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+ dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+ dst_addr, num_bytes, false);
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
+ if (tmz)
+ flags |= AMDGPU_PTE_TMZ;
+
+ cpu_addr = &job->ibs[0].ptr[num_dw];
+
+ if (mem->mem_type == TTM_PL_TT) {
+ dma_addr_t *dma_addr;
+
+ dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
+ amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
+ } else {
+ dma_addr_t dma_address;
+
+ dma_address = mm_cur->start;
+ dma_address += adev->vm_manager.vram_base_offset;
+
+ for (i = 0; i < num_pages; ++i) {
+ amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address,
+ flags, cpu_addr);
+ dma_address += PAGE_SIZE;
+ }
+ }
+
+ dma_fence_put(amdgpu_job_submit(job));
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ * @adev: amdgpu device
+ * @src: buffer/address where to read from
+ * @dst: buffer/address where to write to
+ * @size: number of bytes to copy
+ * @tmz: if a secure copy should be used
+ * @resv: resv object to sync to
+ * @f: Returns the last fence if multiple jobs are submitted.
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ */
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor src_mm, dst_mm;
+ struct dma_fence *fence = NULL;
+ int r = 0;
+
+ if (!adev->mman.buffer_funcs_enabled) {
+ DRM_ERROR("Trying to move memory with ring turned off.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_res_first(src->mem, src->offset, size, &src_mm);
+ amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (src_mm.remaining) {
+ uint64_t from, to, cur_size;
+ struct dma_fence *next;
+
+ /* Never copy more than 256MiB at once to avoid a timeout */
+ cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
+
+ /* Map src to window 0 and dst to window 1. */
+ r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
+ 0, ring, tmz, &cur_size, &from);
+ if (r)
+ goto error;
+
+ r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
+ 1, ring, tmz, &cur_size, &to);
+ if (r)
+ goto error;
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size,
+ resv, &next, false, true, tmz);
+ if (r)
+ goto error;
+
+ dma_fence_put(fence);
+ fence = next;
+
+ amdgpu_res_next(&src_mm, cur_size);
+ amdgpu_res_next(&dst_mm, cur_size);
+ }
+error:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+/*
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
+ * help move buffers to and from VRAM.
+ */
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ bool evict,
+ struct ttm_resource *new_mem,
+ struct ttm_resource *old_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_copy_mem src, dst;
+ struct dma_fence *fence = NULL;
+ int r;
+
+ src.bo = bo;
+ dst.bo = bo;
+ src.mem = old_mem;
+ dst.mem = new_mem;
+ src.offset = 0;
+ dst.offset = 0;
+
+ r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+ new_mem->size,
+ amdgpu_bo_encrypted(abo),
+ bo->base.resv, &fence);
+ if (r)
+ goto error;
+
+ /* clear the space being freed */
+ if (old_mem->mem_type == TTM_PL_VRAM &&
+ (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
+ struct dma_fence *wipe_fence = NULL;
+
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
+ false);
+ if (r) {
+ goto error;
+ } else if (wipe_fence) {
+ dma_fence_put(fence);
+ fence = wipe_fence;
+ }
+ }
+
+ /* Always block for VM page tables before committing the new location */
+ if (bo->type == ttm_bo_type_kernel)
+ r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
+ else
+ r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
+ dma_fence_put(fence);
+ return r;
+
+error:
+ if (fence)
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ return r;
+}
+
+/*
+ * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ u64 mem_size = (u64)mem->size;
+ struct amdgpu_res_cursor cursor;
+ u64 end;
+
+ if (mem->mem_type == TTM_PL_SYSTEM ||
+ mem->mem_type == TTM_PL_TT)
+ return true;
+ if (mem->mem_type != TTM_PL_VRAM)
+ return false;
+
+ amdgpu_res_first(mem, 0, mem_size, &cursor);
+ end = cursor.start + cursor.size;
+ while (cursor.remaining) {
+ amdgpu_res_next(&cursor, cursor.size);
+
+ if (!cursor.remaining)
+ break;
+
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (end != cursor.start)
+ return false;
+
+ end = cursor.start + cursor.size;
+ }
+
+ return end <= adev->gmc.visible_vram_size;
+}
+
+/*
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
+static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *new_mem,
+ struct ttm_place *hop)
+{
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *abo;
+ struct ttm_resource *old_mem = bo->resource;
+ int r;
+
+ if (new_mem->mem_type == TTM_PL_TT ||
+ new_mem->mem_type == AMDGPU_PL_PREEMPT) {
+ r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
+ if (r)
+ return r;
+ }
+
+ abo = ttm_to_amdgpu_bo(bo);
+ adev = amdgpu_ttm_adev(bo->bdev);
+
+ if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
+ bo->ttm == NULL)) {
+ ttm_bo_move_null(bo, new_mem);
+ goto out;
+ }
+ if (old_mem->mem_type == TTM_PL_SYSTEM &&
+ (new_mem->mem_type == TTM_PL_TT ||
+ new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ ttm_bo_move_null(bo, new_mem);
+ goto out;
+ }
+ if ((old_mem->mem_type == TTM_PL_TT ||
+ old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
+ new_mem->mem_type == TTM_PL_SYSTEM) {
+ r = ttm_bo_wait_ctx(bo, ctx);
+ if (r)
+ return r;
+
+ amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ ttm_resource_free(bo, &bo->resource);
+ ttm_bo_assign_mem(bo, new_mem);
+ goto out;
+ }
+
+ if (old_mem->mem_type == AMDGPU_PL_GDS ||
+ old_mem->mem_type == AMDGPU_PL_GWS ||
+ old_mem->mem_type == AMDGPU_PL_OA ||
+ old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_GDS ||
+ new_mem->mem_type == AMDGPU_PL_GWS ||
+ new_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_DOORBELL) {
+ /* Nothing to save here */
+ ttm_bo_move_null(bo, new_mem);
+ goto out;
+ }
+
+ if (bo->type == ttm_bo_type_device &&
+ new_mem->mem_type == TTM_PL_VRAM &&
+ old_mem->mem_type != TTM_PL_VRAM) {
+ /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
+ * accesses the BO after it's moved.
+ */
+ abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ }
+
+ if (adev->mman.buffer_funcs_enabled) {
+ if (((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+
+ r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
+ } else {
+ r = -ENODEV;
+ }
+
+ if (r) {
+ /* Check that all memory is CPU accessible */
+ if (!amdgpu_mem_visible(adev, old_mem) ||
+ !amdgpu_mem_visible(adev, new_mem)) {
+ pr_err("Move buffer fallback to memcpy unavailable\n");
+ return r;
+ }
+
+ r = ttm_bo_move_memcpy(bo, ctx, new_mem);
+ if (r)
+ return r;
+ }
+
+out:
+ /* update statistics */
+ atomic64_add(bo->base.size, &adev->num_bytes_moved);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
+static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
+ struct ttm_resource *mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ size_t bus_size = (size_t)mem->size;
+
+ switch (mem->mem_type) {
+ case TTM_PL_SYSTEM:
+ /* system memory */
+ return 0;
+ case TTM_PL_TT:
+ case AMDGPU_PL_PREEMPT:
+ break;
+ case TTM_PL_VRAM:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ /* check if it's visible */
+ if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
+ return -EINVAL;
+
+ if (adev->mman.aper_base_kaddr &&
+ mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+ mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+ mem->bus.offset;
+
+ mem->bus.offset += adev->gmc.aper_base;
+ mem->bus.is_iomem = true;
+ break;
+ case AMDGPU_PL_DOORBELL:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->doorbell.base;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+ unsigned long page_offset)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_res_cursor cursor;
+
+ amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
+ &cursor);
+
+ if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
+ return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+
+ return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
+}
+
+/**
+ * amdgpu_ttm_domain_start - Returns GPU start address
+ * @adev: amdgpu device object
+ * @type: type of the memory
+ *
+ * Returns:
+ * GPU start address of a memory domain
+ */
+
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
+{
+ switch (type) {
+ case TTM_PL_TT:
+ return adev->gmc.gart_start;
+ case TTM_PL_VRAM:
+ return adev->gmc.vram_start;
+ }
+
+ return 0;
+}
+
+/*
+ * TTM backend functions.
+ */
+struct amdgpu_ttm_tt {
+ struct ttm_tt ttm;
+ struct drm_gem_object *gobj;
+ u64 offset;
+ uint64_t userptr;
+ struct task_struct *usertask;
+ uint32_t userflags;
+ bool bound;
+ int32_t pool_id;
+};
+
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/*
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
+ *
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
+ */
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+ struct hmm_range **range)
+{
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ unsigned long start = gtt->userptr;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ bool readonly;
+ int r = 0;
+
+ /* Make sure get_user_pages_done() can cleanup gracefully */
+ *range = NULL;
+
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
+ }
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, start);
+ if (unlikely(!vma)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+
+ readonly = amdgpu_ttm_tt_is_readonly(ttm);
+ r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
+ readonly, NULL, pages, range);
+out_unlock:
+ mmap_read_unlock(mm);
+ if (r)
+ pr_debug("failed %d to get user pages 0x%lx\n", r, start);
+
+ mmput(mm);
+
+ return r;
+}
+
+/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
+ */
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+ if (gtt && gtt->userptr && range)
+ amdgpu_hmm_range_get_pages_done(range);
+}
+
+/*
+ * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
+ *
+ * Returns: true if pages are still valid
+ */
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (!gtt || !gtt->userptr || !range)
+ return false;
+
+ DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
+ gtt->userptr, ttm->num_pages);
+
+ WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
+
+ return !amdgpu_hmm_range_get_pages_done(range);
+}
+#endif
+
+/*
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ *
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+{
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i] = pages ? pages[i] : NULL;
+}
+
+/*
+ * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+ enum dma_data_direction direction = write ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ int r;
+
+ /* Allocate an SG array and squash pages into it */
+ r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
+ (u64)ttm->num_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (r)
+ goto release_sg;
+
+ /* Map SG to device */
+ r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+ if (r)
+ goto release_sg;
+
+ /* convert SG to linear array of pages and dma addresses */
+ drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
+ ttm->num_pages);
+
+ return 0;
+
+release_sg:
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ return r;
+}
+
+/*
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
+static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+ enum dma_data_direction direction = write ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+ /* double check that we don't free the table twice */
+ if (!ttm->sg || !ttm->sg->sgl)
+ return;
+
+ /* unmap the pages mapped to the device */
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+ sg_free_table(ttm->sg);
+}
+
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+ struct ttm_tt *ttm, uint64_t flags)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ uint64_t total_pages = ttm->num_pages;
+ int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+ uint64_t page_idx, pages_per_xcc;
+ int i;
+ uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+
+ pages_per_xcc = total_pages;
+ do_div(pages_per_xcc, num_xcc);
+
+ for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+ /* MQD page: use default flags */
+ amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ 1, &gtt->ttm.dma_address[page_idx], flags);
+ /*
+ * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+ * the second page of the BO onward.
+ */
+ amdgpu_gart_bind(adev,
+ gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+ pages_per_xcc - 1,
+ &gtt->ttm.dma_address[page_idx + 1],
+ ctrl_flags);
+ }
+}
+
+static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ uint64_t flags)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+ struct ttm_tt *ttm = tbo->ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (amdgpu_bo_encrypted(abo))
+ flags |= AMDGPU_PTE_TMZ;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
+ amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
+ } else {
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
+ }
+}
+
+/*
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_resource *bo_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ uint64_t flags;
+ int r;
+
+ if (!bo_mem)
+ return -EINVAL;
+
+ if (gtt->bound)
+ return 0;
+
+ if (gtt->userptr) {
+ r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
+ if (r) {
+ DRM_ERROR("failed to pin userptr\n");
+ return r;
+ }
+ } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
+ if (!ttm->sg) {
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ attach = gtt->gobj->import_attach;
+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ ttm->sg = sgt;
+ }
+
+ drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
+ ttm->num_pages);
+ }
+
+ if (!ttm->num_pages) {
+ WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
+ ttm->num_pages, bo_mem, ttm);
+ }
+
+ if (bo_mem->mem_type != TTM_PL_TT ||
+ !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
+ gtt->offset = AMDGPU_BO_INVALID_OFFSET;
+ return 0;
+ }
+
+ /* compute PTE flags relevant to this BO memory */
+ flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+ /* bind pages into GART page tables */
+ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
+ amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ gtt->ttm.dma_address, flags);
+ gtt->bound = true;
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
+ * through AGP or GART aperture.
+ *
+ * If bo is accessible through AGP aperture, then use AGP aperture
+ * to access bo; otherwise allocate logical space in GART aperture
+ * and map bo to GART aperture.
+ */
+int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
+ struct ttm_placement placement;
+ struct ttm_place placements;
+ struct ttm_resource *tmp;
+ uint64_t addr, flags;
+ int r;
+
+ if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
+ return 0;
+
+ addr = amdgpu_gmc_agp_addr(bo);
+ if (addr != AMDGPU_BO_INVALID_OFFSET) {
+ bo->resource->start = addr >> PAGE_SHIFT;
+ return 0;
+ }
+
+ /* allocate GART space */
+ placement.num_placement = 1;
+ placement.placement = &placements;
+ placement.num_busy_placement = 1;
+ placement.busy_placement = &placements;
+ placements.fpfn = 0;
+ placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+ placements.mem_type = TTM_PL_TT;
+ placements.flags = bo->resource->placement;
+
+ r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
+ if (unlikely(r))
+ return r;
+
+ /* compute PTE flags for this buffer object */
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
+
+ /* Bind pages */
+ gtt->offset = (u64)tmp->start << PAGE_SHIFT;
+ amdgpu_ttm_gart_bind(adev, bo, flags);
+ amdgpu_gart_invalidate_tlb(adev);
+ ttm_resource_free(bo, &bo->resource);
+ ttm_bo_assign_mem(bo, tmp);
+
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
+ uint64_t flags;
+
+ if (!tbo->ttm)
+ return;
+
+ flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
+ amdgpu_ttm_gart_bind(adev, tbo, flags);
+}
+
+/*
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ /* if the pages have userptr pinning then clear that first */
+ if (gtt->userptr) {
+ amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
+ } else if (ttm->sg && gtt->gobj->import_attach) {
+ struct dma_buf_attachment *attach;
+
+ attach = gtt->gobj->import_attach;
+ dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+ ttm->sg = NULL;
+ }
+
+ if (!gtt->bound)
+ return;
+
+ if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
+ return;
+
+ /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
+ amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
+ gtt->bound = false;
+}
+
+static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+
+ ttm_tt_fini(&gtt->ttm);
+ kfree(gtt);
+}
+
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ * @page_flags: Page flags to be added to the ttm_tt object
+ *
+ * Called by ttm_tt_create().
+ */
+static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
+ uint32_t page_flags)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_ttm_tt *gtt;
+ enum ttm_caching caching;
+
+ gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
+ if (!gtt)
+ return NULL;
+
+ gtt->gobj = &bo->base;
+ if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+ gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+ else
+ gtt->pool_id = abo->xcp_id;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+ caching = ttm_write_combined;
+ else
+ caching = ttm_cached;
+
+ /* allocate space for the uninitialized page entries */
+ if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
+ kfree(gtt);
+ return NULL;
+ }
+ return &gtt->ttm;
+}
+
+/*
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
+static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct ttm_pool *pool;
+ pgoff_t i;
+ int ret;
+
+ /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
+ if (gtt->userptr) {
+ ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!ttm->sg)
+ return -ENOMEM;
+ return 0;
+ }
+
+ if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return 0;
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+ ret = ttm_pool_alloc(pool, ttm, ctx);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i]->mapping = bdev->dev_mapping;
+
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
+static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
+ struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ struct amdgpu_device *adev;
+ struct ttm_pool *pool;
+ pgoff_t i;
+
+ amdgpu_ttm_backend_unbind(bdev, ttm);
+
+ if (gtt->userptr) {
+ amdgpu_ttm_tt_set_user_pages(ttm, NULL);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ return;
+ }
+
+ if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return;
+
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i]->mapping = NULL;
+
+ adev = amdgpu_ttm_adev(bdev);
+
+ if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+ pool = &adev->mman.ttm_pools[gtt->pool_id];
+ else
+ pool = &adev->mman.bdev.pool;
+
+ return ttm_pool_free(pool, ttm);
+}
+
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr: The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr)
+{
+ struct amdgpu_ttm_tt *gtt;
+
+ if (!tbo->ttm)
+ return -EINVAL;
+
+ gtt = (void *)tbo->ttm;
+ *user_addr = gtt->userptr;
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
+ * task
+ *
+ * @bo: The ttm_buffer_object to bind this userptr to
+ * @addr: The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
+ * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
+ * initialize GPU VM for a KFD process.
+ */
+int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
+ uint64_t addr, uint32_t flags)
+{
+ struct amdgpu_ttm_tt *gtt;
+
+ if (!bo->ttm) {
+ /* TODO: We want a separate TTM object type for userptrs */
+ bo->ttm = amdgpu_ttm_tt_create(bo, 0);
+ if (bo->ttm == NULL)
+ return -ENOMEM;
+ }
+
+ /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
+ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
+
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
+ gtt->userptr = addr;
+ gtt->userflags = flags;
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+ gtt->usertask = current->group_leader;
+ get_task_struct(gtt->usertask);
+
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (gtt == NULL)
+ return NULL;
+
+ if (gtt->usertask == NULL)
+ return NULL;
+
+ return gtt->usertask->mm;
+}
+
+/*
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
+ * address range for the current task.
+ *
+ */
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+ unsigned long end, unsigned long *userptr)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+ unsigned long size;
+
+ if (gtt == NULL || !gtt->userptr)
+ return false;
+
+ /* Return false if no part of the ttm_tt object lies within
+ * the range
+ */
+ size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
+ if (gtt->userptr > end || gtt->userptr + size <= start)
+ return false;
+
+ if (userptr)
+ *userptr = gtt->userptr;
+ return true;
+}
+
+/*
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
+ */
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (gtt == NULL || !gtt->userptr)
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
+bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+ if (gtt == NULL)
+ return false;
+
+ return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+}
+
+/**
+ * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PDE (Page Directory Entry).
+ */
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
+{
+ uint64_t flags = 0;
+
+ if (mem && mem->mem_type != TTM_PL_SYSTEM)
+ flags |= AMDGPU_PTE_VALID;
+
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_DOORBELL ||
+ mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ flags |= AMDGPU_PTE_SYSTEM;
+
+ if (ttm->caching == ttm_cached)
+ flags |= AMDGPU_PTE_SNOOPED;
+ }
+
+ if (mem && mem->mem_type == TTM_PL_VRAM &&
+ mem->bus.caching == ttm_cached)
+ flags |= AMDGPU_PTE_SNOOPED;
+
+ return flags;
+}
+
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @adev: amdgpu_device pointer
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PTE (Page Table Entry).
+ */
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+ struct ttm_resource *mem)
+{
+ uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
+
+ flags |= adev->gart.gart_pte_flags;
+ flags |= AMDGPU_PTE_READABLE;
+
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ flags |= AMDGPU_PTE_WRITEABLE;
+
+ return flags;
+}
+
+/*
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
+ * object.
+ *
+ * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
+ * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
+ * it can find space for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
+static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
+ const struct ttm_place *place)
+{
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *f;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return ttm_bo_eviction_valuable(bo, place);
+
+ /* Swapout? */
+ if (bo->resource->mem_type == TTM_PL_SYSTEM)
+ return true;
+
+ if (bo->type == ttm_bo_type_kernel &&
+ !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
+ return false;
+
+ /* If bo is a KFD BO, check if the bo belongs to the current process.
+ * If true, then return false as any KFD process needs all its BOs to
+ * be resident to run successfully
+ */
+ dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP, f) {
+ if (amdkfd_fence_check_mm(f, current->mm))
+ return false;
+ }
+
+ /* Preemptible BOs don't own system resources managed by the
+ * driver (pages, VRAM, GART space). They point to resources
+ * owned by someone else (e.g. pageable memory in user mode
+ * or a DMABuf). They are used in a preemptible context so we
+ * can guarantee no deadlocks and good QoS in case of MMU
+ * notifiers or DMABuf move notifiers from the resource owner.
+ */
+ if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
+ return false;
+
+ if (bo->resource->mem_type == TTM_PL_TT &&
+ amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
+ return false;
+
+ return ttm_bo_eviction_valuable(bo, place);
+}
+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+ void *buf, size_t size, bool write)
+{
+ while (size) {
+ uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+ uint64_t bytes = 4 - (pos & 0x3);
+ uint32_t shift = (pos & 0x3) * 8;
+ uint32_t mask = 0xffffffff << shift;
+ uint32_t value = 0;
+
+ if (size < bytes) {
+ mask &= 0xffffffff >> (bytes - size) * 8;
+ bytes = size;
+ }
+
+ if (mask != 0xffffffff) {
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
+ if (write) {
+ value &= ~mask;
+ value |= (*(uint32_t *)buf << shift) & mask;
+ amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
+ } else {
+ value = (value & mask) >> shift;
+ memcpy(buf, &value, bytes);
+ }
+ } else {
+ amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
+ }
+
+ pos += bytes;
+ buf += bytes;
+ size -= bytes;
+ }
+}
+
+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf,
+ int len, int write)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct amdgpu_res_cursor src_mm;
+ struct amdgpu_job *job;
+ struct dma_fence *fence;
+ uint64_t src_addr, dst_addr;
+ unsigned int num_dw;
+ int r, idx;
+
+ if (len != PAGE_SIZE)
+ return -EINVAL;
+
+ if (!adev->mman.sdma_access_ptr)
+ return -EACCES;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ if (write)
+ memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+ &job);
+ if (r)
+ goto out;
+
+ amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
+ src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+ src_mm.start;
+ dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+ if (write)
+ swap(src_addr, dst_addr);
+
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+ PAGE_SIZE, false);
+
+ amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ fence = amdgpu_job_submit(job);
+
+ if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+ r = -ETIMEDOUT;
+ dma_fence_put(fence);
+
+ if (!(r || write))
+ memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
+ *
+ * @bo: The buffer object to read/write
+ * @offset: Offset into buffer object
+ * @buf: Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write: true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf, int len,
+ int write)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct amdgpu_res_cursor cursor;
+ int ret = 0;
+
+ if (bo->resource->mem_type != TTM_PL_VRAM)
+ return -EIO;
+
+ if (amdgpu_device_has_timeouts_enabled(adev) &&
+ !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+ return len;
+
+ amdgpu_res_first(bo->resource, offset, len, &cursor);
+ while (cursor.remaining) {
+ size_t count, size = cursor.size;
+ loff_t pos = cursor.start;
+
+ count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+ size -= count;
+ if (size) {
+ /* using MM to access rest vram and handle un-aligned address */
+ pos += count;
+ buf += count;
+ amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+ }
+
+ ret += cursor.size;
+ buf += cursor.size;
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ return ret;
+}
+
+static void
+amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
+{
+ amdgpu_bo_move_notify(bo, false, NULL);
+}
+
+static struct ttm_device_funcs amdgpu_bo_driver = {
+ .ttm_tt_create = &amdgpu_ttm_tt_create,
+ .ttm_tt_populate = &amdgpu_ttm_tt_populate,
+ .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
+ .ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
+ .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
+ .evict_flags = &amdgpu_evict_flags,
+ .move = &amdgpu_bo_move,
+ .delete_mem_notify = &amdgpu_bo_delete_mem_notify,
+ .release_notify = &amdgpu_bo_release_notify,
+ .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
+ .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
+ .access_memory = &amdgpu_ttm_access_memory,
+};
+
+/*
+ * Firmware Reservation functions
+ */
+/**
+ * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free fw reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
+ NULL, &adev->mman.fw_vram_usage_va);
+}
+
+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+ NULL,
+ &adev->mman.drv_vram_usage_va);
+}
+
+/**
+ * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from fw.
+ */
+static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
+{
+ uint64_t vram_size = adev->gmc.visible_vram_size;
+
+ adev->mman.fw_vram_usage_va = NULL;
+ adev->mman.fw_vram_usage_reserved_bo = NULL;
+
+ if (adev->mman.fw_vram_usage_size == 0 ||
+ adev->mman.fw_vram_usage_size > vram_size)
+ return 0;
+
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size,
+ &adev->mman.fw_vram_usage_reserved_bo,
+ &adev->mman.fw_vram_usage_va);
+}
+
+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+ u64 vram_size = adev->gmc.visible_vram_size;
+
+ adev->mman.drv_vram_usage_va = NULL;
+ adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+ if (adev->mman.drv_vram_usage_size == 0 ||
+ adev->mman.drv_vram_usage_size > vram_size)
+ return 0;
+
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->mman.drv_vram_usage_start_offset,
+ adev->mman.drv_vram_usage_size,
+ &adev->mman.drv_vram_usage_reserved_bo,
+ &adev->mman.drv_vram_usage_va);
+}
+
+/*
+ * Memoy training reservation functions
+ */
+
+/**
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free memory training reserved vram if it has been reserved.
+ */
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
+ ctx->c2p_bo = NULL;
+
+ return 0;
+}
+
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+ uint32_t reserve_size)
+{
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->c2p_train_data_offset =
+ ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
+ ctx->p2c_train_data_offset =
+ (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+ ctx->train_data_size =
+ GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+}
+
+/*
+ * reserve TMR memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
+ */
+static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
+{
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+ bool mem_train_support = false;
+ uint32_t reserve_size = 0;
+ int ret;
+
+ if (adev->bios && !amdgpu_sriov_vf(adev)) {
+ if (amdgpu_atomfirmware_mem_training_supported(adev))
+ mem_train_support = true;
+ else
+ DRM_DEBUG("memory training does not support!\n");
+ }
+
+ /*
+ * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
+ * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
+ *
+ * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
+ * discovery data and G6 memory training data respectively
+ */
+ if (adev->bios)
+ reserve_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+ if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ reserve_size = max(reserve_size, (uint32_t)280 << 20);
+ else if (!reserve_size)
+ reserve_size = DISCOVERY_TMR_OFFSET;
+
+ if (mem_train_support) {
+ /* reserve vram for mem train according to TMR location */
+ amdgpu_ttm_training_data_block_init(adev, reserve_size);
+ ret = amdgpu_bo_create_kernel_at(adev,
+ ctx->c2p_train_data_offset,
+ ctx->train_data_size,
+ &ctx->c2p_bo,
+ NULL);
+ if (ret) {
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ return ret;
+ }
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+ }
+
+ if (!adev->gmc.is_app_apu) {
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size,
+ reserve_size, &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
+ NULL, NULL);
+ return ret;
+ }
+ } else {
+ DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
+ }
+
+ return 0;
+}
+
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+ return 0;
+
+ adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+ sizeof(*adev->mman.ttm_pools),
+ GFP_KERNEL);
+ if (!adev->mman.ttm_pools)
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+ ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+ adev->gmc.mem_partitions[i].numa.node,
+ false, false);
+ }
+ return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+ return;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+ ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+ kfree(adev->mman.ttm_pools);
+ adev->mman.ttm_pools = NULL;
+}
+
+/*
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as various
+ * gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
+int amdgpu_ttm_init(struct amdgpu_device *adev)
+{
+ uint64_t gtt_size;
+ int r;
+
+ mutex_init(&adev->mman.gtt_window_lock);
+
+ /* No others user of address space so set it to 0 */
+ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
+ adev_to_drm(adev)->anon_inode->i_mapping,
+ adev_to_drm(adev)->vma_offset_manager,
+ adev->need_swiotlb,
+ dma_addressing_limited(adev->dev));
+ if (r) {
+ DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ return r;
+ }
+
+ r = amdgpu_ttm_pools_init(adev);
+ if (r) {
+ DRM_ERROR("failed to init ttm pools(%d).\n", r);
+ return r;
+ }
+ adev->mman.initialized = true;
+
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ DRM_ERROR("Failed initializing VRAM heap.\n");
+ return r;
+ }
+
+ /* Change the size here instead of the init above so only lpfn is affected */
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_X86
+ if (adev->gmc.xgmi.connected_to_cpu)
+ adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+ adev->gmc.visible_vram_size);
+
+ else if (adev->gmc.is_app_apu)
+ DRM_DEBUG_DRIVER(
+ "No need to ioremap when real vram size is 0\n");
+ else
+#endif
+ adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+ adev->gmc.visible_vram_size);
+#endif
+
+ /*
+ *The reserved vram for firmware must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_fw_reserve_vram_init(adev);
+ if (r)
+ return r;
+
+ /*
+ *The reserved vram for driver must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_drv_reserve_vram_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * only NAVI10 and onwards ASIC support for IP discovery.
+ * If IP discovery enabled, a block of memory should be
+ * reserved for IP discovey.
+ */
+ if (adev->mman.discovery_bin) {
+ r = amdgpu_ttm_reserve_tmr(adev);
+ if (r)
+ return r;
+ }
+
+ /* allocate memory as required for VGA
+ * This is used for VGA emulation and pre-OS scanout buffers to
+ * avoid display artifacts while transitioning between pre-OS
+ * and driver.
+ */
+ if (!adev->gmc.is_app_apu) {
+ r = amdgpu_bo_create_kernel_at(adev, 0,
+ adev->mman.stolen_vga_size,
+ &adev->mman.stolen_vga_memory,
+ NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+ adev->mman.stolen_extended_size,
+ &adev->mman.stolen_extended_memory,
+ NULL);
+
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->mman.stolen_reserved_offset,
+ adev->mman.stolen_reserved_size,
+ &adev->mman.stolen_reserved_memory,
+ NULL);
+ if (r)
+ return r;
+ } else {
+ DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+ }
+
+ DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
+ (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
+
+ /* Compute GTT size, either based on TTM limit
+ * or whatever the user passed on module init.
+ */
+ if (amdgpu_gtt_size == -1)
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ else
+ gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ /* Initialize GTT memory pool */
+ r = amdgpu_gtt_mgr_init(adev, gtt_size);
+ if (r) {
+ DRM_ERROR("Failed initializing GTT heap.\n");
+ return r;
+ }
+ DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
+ (unsigned int)(gtt_size / (1024 * 1024)));
+
+ /* Initiailize doorbell pool on PCI BAR */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
+ if (r) {
+ DRM_ERROR("Failed initializing doorbell heap.\n");
+ return r;
+ }
+
+ /* Create a boorbell page for kernel usages */
+ r = amdgpu_doorbell_create_kernel_doorbells(adev);
+ if (r) {
+ DRM_ERROR("Failed to initialize kernel doorbells.\n");
+ return r;
+ }
+
+ /* Initialize preemptible memory pool */
+ r = amdgpu_preempt_mgr_init(adev);
+ if (r) {
+ DRM_ERROR("Failed initializing PREEMPT heap.\n");
+ return r;
+ }
+
+ /* Initialize various on-chip memory pools */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
+ if (r) {
+ DRM_ERROR("Failed initializing GDS heap.\n");
+ return r;
+ }
+
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
+ if (r) {
+ DRM_ERROR("Failed initializing gws heap.\n");
+ return r;
+ }
+
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
+ if (r) {
+ DRM_ERROR("Failed initializing oa heap.\n");
+ return r;
+ }
+ if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr))
+ DRM_WARN("Debug VRAM access will use slowpath MM access\n");
+
+ return 0;
+}
+
+/*
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
+void amdgpu_ttm_fini(struct amdgpu_device *adev)
+{
+ int idx;
+
+ if (!adev->mman.initialized)
+ return;
+
+ amdgpu_ttm_pools_fini(adev);
+
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ /* return the stolen vga memory back to VRAM */
+ if (!adev->gmc.is_app_apu) {
+ amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+ /* return the FW reserved memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ if (adev->mman.stolen_reserved_size)
+ amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+ NULL, NULL);
+ }
+ amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+ &adev->mman.sdma_access_ptr);
+ amdgpu_ttm_fw_reserve_vram_fini(adev);
+ amdgpu_ttm_drv_reserve_vram_fini(adev);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
+
+ drm_dev_exit(idx);
+ }
+
+ amdgpu_vram_mgr_fini(adev);
+ amdgpu_gtt_mgr_fini(adev);
+ amdgpu_preempt_mgr_fini(adev);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_device_fini(&adev->mman.bdev);
+ adev->mman.initialized = false;
+ DRM_INFO("amdgpu: ttm finalized\n");
+}
+
+/**
+ * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: true when we can use buffer functions.
+ *
+ * Enable/disable use of buffer functions during suspend/resume. This should
+ * only be called at bootup or when userspace isn't running.
+ */
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
+{
+ struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ uint64_t size;
+ int r;
+
+ if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
+ adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
+ return;
+
+ if (enable) {
+ struct amdgpu_ring *ring;
+ struct drm_gpu_scheduler *sched;
+
+ ring = adev->mman.buffer_funcs_ring;
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->mman.high_pr,
+ DRM_SCHED_PRIORITY_KERNEL, &sched,
+ 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+ return;
+ }
+
+ r = drm_sched_entity_init(&adev->mman.low_pr,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
+ } else {
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.low_pr);
+ dma_fence_put(man->move);
+ man->move = NULL;
+ }
+
+ /* this just adjusts TTM size idea, which sets lpfn to the correct value */
+ if (enable)
+ size = adev->gmc.real_vram_size;
+ else
+ size = adev->gmc.visible_vram_size;
+ man->size = size;
+ adev->mman.buffer_funcs_enabled = enable;
+
+ return;
+
+error_free_entity:
+ drm_sched_entity_destroy(&adev->mman.high_pr);
+}
+
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ bool direct_submit,
+ unsigned int num_dw,
+ struct dma_resv *resv,
+ bool vm_needs_flush,
+ struct amdgpu_job **job,
+ bool delayed)
+{
+ enum amdgpu_ib_pool_type pool = direct_submit ?
+ AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED;
+ int r;
+ struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+ &adev->mman.high_pr;
+ r = amdgpu_job_alloc_with_ib(adev, entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ num_dw * 4, pool, job);
+ if (r)
+ return r;
+
+ if (vm_needs_flush) {
+ (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+ adev->gmc.pdb0_bo :
+ adev->gart.bo);
+ (*job)->vm_needs_flush = true;
+ }
+ if (!resv)
+ return 0;
+
+ return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+}
+
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+ uint64_t dst_offset, uint32_t byte_count,
+ struct dma_resv *resv,
+ struct dma_fence **fence, bool direct_submit,
+ bool vm_needs_flush, bool tmz)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned int num_loops, num_dw;
+ struct amdgpu_job *job;
+ uint32_t max_bytes;
+ unsigned int i;
+ int r;
+
+ if (!direct_submit && !ring->sched.ready) {
+ DRM_ERROR("Trying to move memory with ring turned off.\n");
+ return -EINVAL;
+ }
+
+ max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
+ num_loops = DIV_ROUND_UP(byte_count, max_bytes);
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
+ r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+ resv, vm_needs_flush, &job, false);
+ if (r)
+ return r;
+
+ for (i = 0; i < num_loops; i++) {
+ uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
+ dst_offset, cur_size_in_bytes, tmz);
+
+ src_offset += cur_size_in_bytes;
+ dst_offset += cur_size_in_bytes;
+ byte_count -= cur_size_in_bytes;
+ }
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ if (direct_submit)
+ r = amdgpu_job_submit_direct(job, ring, fence);
+ else
+ *fence = amdgpu_job_submit(job);
+ if (r)
+ goto error_free;
+
+ return r;
+
+error_free:
+ amdgpu_job_free(job);
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ return r;
+}
+
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+ uint64_t dst_addr, uint32_t byte_count,
+ struct dma_resv *resv,
+ struct dma_fence **fence,
+ bool vm_needs_flush, bool delayed)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned int num_loops, num_dw;
+ struct amdgpu_job *job;
+ uint32_t max_bytes;
+ unsigned int i;
+ int r;
+
+ max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+ num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
+ r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+ &job, delayed);
+ if (r)
+ return r;
+
+ for (i = 0; i < num_loops; i++) {
+ uint32_t cur_size = min(byte_count, max_bytes);
+
+ amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
+ cur_size);
+
+ dst_addr += cur_size;
+ byte_count -= cur_size;
+ }
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ *fence = amdgpu_job_submit(job);
+ return 0;
+}
+
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ bool delayed)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_res_cursor dst;
+ int r;
+
+ if (!adev->mman.buffer_funcs_enabled) {
+ DRM_ERROR("Trying to clear memory with ring turned off.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (dst.remaining) {
+ struct dma_fence *next;
+ uint64_t cur_size, to;
+
+ /* Never fill more than 256MiB at once to avoid timeouts */
+ cur_size = min(dst.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+ 1, ring, false, &cur_size, &to);
+ if (r)
+ goto error;
+
+ r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
+ &next, true, delayed);
+ if (r)
+ goto error;
+
+ dma_fence_put(fence);
+ fence = next;
+
+ amdgpu_res_next(&dst, cur_size);
+ }
+error:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+/**
+ * amdgpu_ttm_evict_resources - evict memory buffers
+ * @adev: amdgpu device object
+ * @mem_type: evicted BO's memory type
+ *
+ * Evicts all @mem_type buffers on the lru list of the memory type.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
+{
+ struct ttm_resource_manager *man;
+
+ switch (mem_type) {
+ case TTM_PL_VRAM:
+ case TTM_PL_TT:
+ case AMDGPU_PL_GWS:
+ case AMDGPU_PL_GDS:
+ case AMDGPU_PL_OA:
+ man = ttm_manager_type(&adev->mman.bdev, mem_type);
+ break;
+ default:
+ DRM_ERROR("Trying to evict invalid memory type\n");
+ return -EINVAL;
+ }
+
+ return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+
+ return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
+
+/*
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
+static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (*pos >= adev->gmc.mc_vram_size)
+ return -ENXIO;
+
+ size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
+ while (size) {
+ size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
+ uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
+
+ amdgpu_device_vram_access(adev, *pos, value, bytes, false);
+ if (copy_to_user(buf, value, bytes))
+ return -EFAULT;
+
+ result += bytes;
+ buf += bytes;
+ *pos += bytes;
+ size -= bytes;
+ }
+
+ return result;
+}
+
+/*
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
+static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ if (*pos >= adev->gmc.mc_vram_size)
+ return -ENXIO;
+
+ while (size) {
+ uint32_t value;
+
+ if (*pos >= adev->gmc.mc_vram_size)
+ return result;
+
+ r = get_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+
+ amdgpu_device_mm_access(adev, *pos, &value, 4, true);
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ return result;
+}
+
+static const struct file_operations amdgpu_ttm_vram_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_ttm_vram_read,
+ .write = amdgpu_ttm_vram_write,
+ .llseek = default_llseek,
+};
+
+/*
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ struct iommu_domain *dom;
+ ssize_t result = 0;
+ int r;
+
+ /* retrieve the IOMMU domain if any for this device */
+ dom = iommu_get_domain_for_dev(adev->dev);
+
+ while (size) {
+ phys_addr_t addr = *pos & PAGE_MASK;
+ loff_t off = *pos & ~PAGE_MASK;
+ size_t bytes = PAGE_SIZE - off;
+ unsigned long pfn;
+ struct page *p;
+ void *ptr;
+
+ bytes = min(bytes, size);
+
+ /* Translate the bus address to a physical address. If
+ * the domain is NULL it means there is no IOMMU active
+ * and the address translation is the identity
+ */
+ addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+ pfn = addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -EPERM;
+
+ p = pfn_to_page(pfn);
+ if (p->mapping != adev->mman.bdev.dev_mapping)
+ return -EPERM;
+
+ ptr = kmap_local_page(p);
+ r = copy_to_user(buf, ptr + off, bytes);
+ kunmap_local(ptr);
+ if (r)
+ return -EFAULT;
+
+ size -= bytes;
+ *pos += bytes;
+ result += bytes;
+ }
+
+ return result;
+}
+
+/*
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
+static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ struct iommu_domain *dom;
+ ssize_t result = 0;
+ int r;
+
+ dom = iommu_get_domain_for_dev(adev->dev);
+
+ while (size) {
+ phys_addr_t addr = *pos & PAGE_MASK;
+ loff_t off = *pos & ~PAGE_MASK;
+ size_t bytes = PAGE_SIZE - off;
+ unsigned long pfn;
+ struct page *p;
+ void *ptr;
+
+ bytes = min(bytes, size);
+
+ addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+ pfn = addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -EPERM;
+
+ p = pfn_to_page(pfn);
+ if (p->mapping != adev->mman.bdev.dev_mapping)
+ return -EPERM;
+
+ ptr = kmap_local_page(p);
+ r = copy_from_user(ptr + off, buf, bytes);
+ kunmap_local(ptr);
+ if (r)
+ return -EFAULT;
+
+ size -= bytes;
+ *pos += bytes;
+ result += bytes;
+ }
+
+ return result;
+}
+
+static const struct file_operations amdgpu_ttm_iomem_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_iomem_read,
+ .write = amdgpu_iomem_write,
+ .llseek = default_llseek
+};
+
+#endif
+
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+
+ debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
+ &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
+ debugfs_create_file("amdgpu_iomem", 0444, root, adev,
+ &amdgpu_ttm_iomem_fops);
+ debugfs_create_file("ttm_page_pool", 0444, root, adev,
+ &amdgpu_ttm_page_pool_fops);
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_VRAM),
+ root, "amdgpu_vram_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_TT),
+ root, "amdgpu_gtt_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GDS),
+ root, "amdgpu_gds_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GWS),
+ root, "amdgpu_gws_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_OA),
+ root, "amdgpu_oa_mm");
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
new file mode 100644
index 000000000..65ec82141
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_TTM_H__
+#define __AMDGPU_TTM_H__
+
+#include <linux/dma-direction.h>
+#include <drm/gpu_scheduler.h>
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu.h"
+
+#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
+#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
+#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
+#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
+
+#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
+
+#define AMDGPU_POISON 0xd0bed0be
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+
+struct hmm_range;
+
+struct amdgpu_gtt_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_mm mm;
+ spinlock_t lock;
+};
+
+struct amdgpu_mman {
+ struct ttm_device bdev;
+ struct ttm_pool *ttm_pools;
+ bool initialized;
+ void __iomem *aper_base_kaddr;
+
+ /* buffer handling */
+ const struct amdgpu_buffer_funcs *buffer_funcs;
+ struct amdgpu_ring *buffer_funcs_ring;
+ bool buffer_funcs_enabled;
+
+ struct mutex gtt_window_lock;
+ /* High priority scheduler entity for buffer moves */
+ struct drm_sched_entity high_pr;
+ /* Low priority scheduler entity for VRAM clearing */
+ struct drm_sched_entity low_pr;
+
+ struct amdgpu_vram_mgr vram_mgr;
+ struct amdgpu_gtt_mgr gtt_mgr;
+ struct ttm_resource_manager preempt_mgr;
+
+ uint64_t stolen_vga_size;
+ struct amdgpu_bo *stolen_vga_memory;
+ uint64_t stolen_extended_size;
+ struct amdgpu_bo *stolen_extended_memory;
+ bool keep_stolen_vga_memory;
+
+ struct amdgpu_bo *stolen_reserved_memory;
+ uint64_t stolen_reserved_offset;
+ uint64_t stolen_reserved_size;
+
+ /* discovery */
+ uint8_t *discovery_bin;
+ uint32_t discovery_tmr_size;
+ /* fw reserved memory */
+ struct amdgpu_bo *fw_reserved_memory;
+
+ /* firmware VRAM reservation */
+ u64 fw_vram_usage_start_offset;
+ u64 fw_vram_usage_size;
+ struct amdgpu_bo *fw_vram_usage_reserved_bo;
+ void *fw_vram_usage_va;
+
+ /* driver VRAM reservation */
+ u64 drv_vram_usage_start_offset;
+ u64 drv_vram_usage_size;
+ struct amdgpu_bo *drv_vram_usage_reserved_bo;
+ void *drv_vram_usage_va;
+
+ /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
+ struct amdgpu_bo *sdma_access_bo;
+ void *sdma_access_ptr;
+};
+
+struct amdgpu_copy_mem {
+ struct ttm_buffer_object *bo;
+ struct ttm_resource *mem;
+ unsigned long offset;
+};
+
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
+
+bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
+
+uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
+
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+ struct ttm_resource *mem,
+ u64 offset, u64 size,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt);
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table *sgt);
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr);
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
+ uint64_t start, uint64_t size);
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
+ uint64_t start);
+
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+ bool enable);
+
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+ uint64_t dst_offset, uint32_t byte_count,
+ struct dma_resv *resv,
+ struct dma_fence **fence, bool direct_submit,
+ bool vm_needs_flush, bool tmz);
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f);
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **fence,
+ bool delayed);
+
+int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+ struct hmm_range **range);
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+ struct hmm_range *range);
+#else
+static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct page **pages,
+ struct hmm_range **range)
+{
+ return -EPERM;
+}
+static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+}
+static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+ struct hmm_range *range)
+{
+ return false;
+}
+#endif
+
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr);
+int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
+ uint64_t addr, uint32_t flags);
+bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+ unsigned long end, unsigned long *userptr);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+ int *last_invalidated);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem);
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+ struct ttm_resource *mem);
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
+
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
new file mode 100644
index 000000000..8beefc045
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -0,0 +1,1345 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+
+static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
+{
+ DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
+ DRM_DEBUG("header_size_bytes: %u\n", le32_to_cpu(hdr->header_size_bytes));
+ DRM_DEBUG("header_version_major: %u\n", le16_to_cpu(hdr->header_version_major));
+ DRM_DEBUG("header_version_minor: %u\n", le16_to_cpu(hdr->header_version_minor));
+ DRM_DEBUG("ip_version_major: %u\n", le16_to_cpu(hdr->ip_version_major));
+ DRM_DEBUG("ip_version_minor: %u\n", le16_to_cpu(hdr->ip_version_minor));
+ DRM_DEBUG("ucode_version: 0x%08x\n", le32_to_cpu(hdr->ucode_version));
+ DRM_DEBUG("ucode_size_bytes: %u\n", le32_to_cpu(hdr->ucode_size_bytes));
+ DRM_DEBUG("ucode_array_offset_bytes: %u\n",
+ le32_to_cpu(hdr->ucode_array_offset_bytes));
+ DRM_DEBUG("crc32: 0x%08x\n", le32_to_cpu(hdr->crc32));
+}
+
+void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("MC\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct mc_firmware_header_v1_0 *mc_hdr =
+ container_of(hdr, struct mc_firmware_header_v1_0, header);
+
+ DRM_DEBUG("io_debug_size_bytes: %u\n",
+ le32_to_cpu(mc_hdr->io_debug_size_bytes));
+ DRM_DEBUG("io_debug_array_offset_bytes: %u\n",
+ le32_to_cpu(mc_hdr->io_debug_array_offset_bytes));
+ } else {
+ DRM_ERROR("Unknown MC ucode version: %u.%u\n", version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+ const struct smc_firmware_header_v1_0 *v1_0_hdr;
+ const struct smc_firmware_header_v2_0 *v2_0_hdr;
+ const struct smc_firmware_header_v2_1 *v2_1_hdr;
+
+ DRM_DEBUG("SMC\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, header);
+ DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(v1_0_hdr->ucode_start_addr));
+ } else if (version_major == 2) {
+ switch (version_minor) {
+ case 0:
+ v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header);
+ DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes));
+ DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes));
+ break;
+ case 1:
+ v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header);
+ DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count));
+ DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset));
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("GFX\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct gfx_firmware_header_v1_0 *gfx_hdr =
+ container_of(hdr, struct gfx_firmware_header_v1_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(gfx_hdr->ucode_feature_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(gfx_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(gfx_hdr->jt_size));
+ } else if (version_major == 2) {
+ const struct gfx_firmware_header_v2_0 *gfx_hdr =
+ container_of(hdr, struct gfx_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(gfx_hdr->ucode_feature_version));
+ } else {
+ DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("RLC\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct rlc_firmware_header_v1_0 *rlc_hdr =
+ container_of(hdr, struct rlc_firmware_header_v1_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr->ucode_feature_version));
+ DRM_DEBUG("save_and_restore_offset: %u\n",
+ le32_to_cpu(rlc_hdr->save_and_restore_offset));
+ DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+ DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+ DRM_DEBUG("master_pkt_description_offset: %u\n",
+ le32_to_cpu(rlc_hdr->master_pkt_description_offset));
+ } else if (version_major == 2) {
+ const struct rlc_firmware_header_v2_0 *rlc_hdr =
+ container_of(hdr, struct rlc_firmware_header_v2_0, header);
+ const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 =
+ container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1);
+ const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 =
+ container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
+ const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
+ container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
+
+ switch (version_minor) {
+ case 0:
+ /* rlc_hdr v2_0 */
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr->ucode_feature_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
+ DRM_DEBUG("save_and_restore_offset: %u\n",
+ le32_to_cpu(rlc_hdr->save_and_restore_offset));
+ DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+ DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+ DRM_DEBUG("reg_restore_list_size: %u\n",
+ le32_to_cpu(rlc_hdr->reg_restore_list_size));
+ DRM_DEBUG("reg_list_format_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_start));
+ DRM_DEBUG("reg_list_format_separate_start: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
+ DRM_DEBUG("starting_offsets_start: %u\n",
+ le32_to_cpu(rlc_hdr->starting_offsets_start));
+ DRM_DEBUG("reg_list_format_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
+ DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ DRM_DEBUG("reg_list_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes));
+ DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
+ DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
+ DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ break;
+ case 1:
+ /* rlc_hdr v2_1 */
+ DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length));
+ DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver));
+ DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver));
+ DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes));
+ DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes));
+ DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver));
+ DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver));
+ DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes));
+ DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes));
+ DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver));
+ DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver));
+ DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes));
+ DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes));
+ break;
+ case 2:
+ /* rlc_hdr v2_2 */
+ DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes));
+ DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes));
+ DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes));
+ DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes));
+ break;
+ case 3:
+ /* rlc_hdr v2_3 */
+ DRM_DEBUG("rlcp_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version));
+ DRM_DEBUG("rlcp_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version));
+ DRM_DEBUG("rlcp_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes));
+ DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes));
+ DRM_DEBUG("rlcv_ucode_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version));
+ DRM_DEBUG("rlcv_ucode_feature_version: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version));
+ DRM_DEBUG("rlcv_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes));
+ DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes));
+ break;
+ case 4:
+ /* rlc_hdr v2_4 */
+ DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes));
+ DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
+ break;
+ default:
+ DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
+ break;
+ }
+ } else {
+ DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("SDMA\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct sdma_firmware_header_v1_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v1_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
+ DRM_DEBUG("ucode_change_version: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_change_version));
+ DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(sdma_hdr->jt_offset));
+ DRM_DEBUG("jt_size: %u\n", le32_to_cpu(sdma_hdr->jt_size));
+ if (version_minor >= 1) {
+ const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr =
+ container_of(sdma_hdr, struct sdma_firmware_header_v1_1, v1_0);
+ DRM_DEBUG("digest_size: %u\n", le32_to_cpu(sdma_v1_1_hdr->digest_size));
+ }
+ } else if (version_major == 2) {
+ const struct sdma_firmware_header_v2_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v2_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
+ DRM_DEBUG("ctx_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_offset));
+ DRM_DEBUG("ctx_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_size));
+ DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
+ DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
+ } else {
+ DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
+ version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+ uint32_t fw_index;
+ const struct psp_fw_bin_desc *desc;
+
+ DRM_DEBUG("PSP\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct psp_firmware_header_v1_0 *psp_hdr =
+ container_of(hdr, struct psp_firmware_header_v1_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(psp_hdr->sos.fw_version));
+ DRM_DEBUG("sos_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr->sos.offset_bytes));
+ DRM_DEBUG("sos_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr->sos.size_bytes));
+ if (version_minor == 1) {
+ const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+ DRM_DEBUG("toc_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc.fw_version));
+ DRM_DEBUG("toc_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc.offset_bytes));
+ DRM_DEBUG("toc_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc.size_bytes));
+ DRM_DEBUG("kdb_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->kdb.fw_version));
+ DRM_DEBUG("kdb_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->kdb.offset_bytes));
+ DRM_DEBUG("kdb_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->kdb.size_bytes));
+ }
+ if (version_minor == 2) {
+ const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0);
+ DRM_DEBUG("kdb_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb.fw_version));
+ DRM_DEBUG("kdb_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb.offset_bytes));
+ DRM_DEBUG("kdb_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_2->kdb.size_bytes));
+ }
+ if (version_minor == 3) {
+ const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+ const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 =
+ container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1);
+ DRM_DEBUG("toc_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.fw_version));
+ DRM_DEBUG("toc_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.offset_bytes));
+ DRM_DEBUG("toc_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc.size_bytes));
+ DRM_DEBUG("kdb_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.fw_version));
+ DRM_DEBUG("kdb_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.offset_bytes));
+ DRM_DEBUG("kdb_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.size_bytes));
+ DRM_DEBUG("spl_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl.fw_version));
+ DRM_DEBUG("spl_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl.offset_bytes));
+ DRM_DEBUG("spl_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl.size_bytes));
+ }
+ } else if (version_major == 2) {
+ const struct psp_firmware_header_v2_0 *psp_hdr_v2_0 =
+ container_of(hdr, struct psp_firmware_header_v2_0, header);
+ for (fw_index = 0; fw_index < le32_to_cpu(psp_hdr_v2_0->psp_fw_bin_count); fw_index++) {
+ desc = &(psp_hdr_v2_0->psp_fw_bin[fw_index]);
+ switch (desc->fw_type) {
+ case PSP_FW_TYPE_PSP_SOS:
+ DRM_DEBUG("psp_sos_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sos_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SYS_DRV:
+ DRM_DEBUG("psp_sys_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_sys_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_KDB:
+ DRM_DEBUG("psp_kdb_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_kdb_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_TOC:
+ DRM_DEBUG("psp_toc_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_toc_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SPL:
+ DRM_DEBUG("psp_spl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_spl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RL:
+ DRM_DEBUG("psp_rl_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_rl_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_SOC_DRV:
+ DRM_DEBUG("psp_soc_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_soc_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_INTF_DRV:
+ DRM_DEBUG("psp_intf_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_intf_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_DBG_DRV:
+ DRM_DEBUG("psp_dbg_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_dbg_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ case PSP_FW_TYPE_PSP_RAS_DRV:
+ DRM_DEBUG("psp_ras_drv_version: %u\n",
+ le32_to_cpu(desc->fw_version));
+ DRM_DEBUG("psp_ras_drv_size_bytes: %u\n",
+ le32_to_cpu(desc->size_bytes));
+ break;
+ default:
+ DRM_DEBUG("Unsupported PSP fw type: %d\n", desc->fw_type);
+ break;
+ }
+ }
+ } else {
+ DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
+ version_major, version_minor);
+ }
+}
+
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("GPU_INFO\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr =
+ container_of(hdr, struct gpu_info_firmware_header_v1_0, header);
+
+ DRM_DEBUG("version_major: %u\n",
+ le16_to_cpu(gpu_info_hdr->version_major));
+ DRM_DEBUG("version_minor: %u\n",
+ le16_to_cpu(gpu_info_hdr->version_minor));
+ } else {
+ DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor);
+ }
+}
+
+static int amdgpu_ucode_validate(const struct firmware *fw)
+{
+ const struct common_firmware_header *hdr =
+ (const struct common_firmware_header *)fw->data;
+
+ if (fw->size == le32_to_cpu(hdr->size_bytes))
+ return 0;
+
+ return -EINVAL;
+}
+
+bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
+ uint16_t hdr_major, uint16_t hdr_minor)
+{
+ if ((hdr->common.header_version_major == hdr_major) &&
+ (hdr->common.header_version_minor == hdr_minor))
+ return true;
+ return false;
+}
+
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
+{
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ return AMDGPU_FW_LOAD_DIRECT;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_HAWAII:
+ case CHIP_MULLINS:
+ return AMDGPU_FW_LOAD_DIRECT;
+#endif
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ return AMDGPU_FW_LOAD_SMU;
+ case CHIP_CYAN_SKILLFISH:
+ if (!(load_type &&
+ adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
+ return AMDGPU_FW_LOAD_DIRECT;
+ else
+ return AMDGPU_FW_LOAD_PSP;
+ default:
+ if (!load_type)
+ return AMDGPU_FW_LOAD_DIRECT;
+ else
+ return AMDGPU_FW_LOAD_PSP;
+ }
+}
+
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
+{
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ return "SDMA0";
+ case AMDGPU_UCODE_ID_SDMA1:
+ return "SDMA1";
+ case AMDGPU_UCODE_ID_SDMA2:
+ return "SDMA2";
+ case AMDGPU_UCODE_ID_SDMA3:
+ return "SDMA3";
+ case AMDGPU_UCODE_ID_SDMA4:
+ return "SDMA4";
+ case AMDGPU_UCODE_ID_SDMA5:
+ return "SDMA5";
+ case AMDGPU_UCODE_ID_SDMA6:
+ return "SDMA6";
+ case AMDGPU_UCODE_ID_SDMA7:
+ return "SDMA7";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ return "SDMA_CTX";
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ return "SDMA_CTL";
+ case AMDGPU_UCODE_ID_CP_CE:
+ return "CP_CE";
+ case AMDGPU_UCODE_ID_CP_PFP:
+ return "CP_PFP";
+ case AMDGPU_UCODE_ID_CP_ME:
+ return "CP_ME";
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ return "CP_MEC1";
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ return "CP_MEC1_JT";
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ return "CP_MEC2";
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ return "CP_MEC2_JT";
+ case AMDGPU_UCODE_ID_CP_MES:
+ return "CP_MES";
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ return "CP_MES_DATA";
+ case AMDGPU_UCODE_ID_CP_MES1:
+ return "CP_MES_KIQ";
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ return "CP_MES_KIQ_DATA";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ return "RLC_RESTORE_LIST_CNTL";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ return "RLC_RESTORE_LIST_GPM_MEM";
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ return "RLC_RESTORE_LIST_SRM_MEM";
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ return "RLC_IRAM";
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ return "RLC_DRAM";
+ case AMDGPU_UCODE_ID_RLC_G:
+ return "RLC_G";
+ case AMDGPU_UCODE_ID_RLC_P:
+ return "RLC_P";
+ case AMDGPU_UCODE_ID_RLC_V:
+ return "RLC_V";
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ return "GLOBAL_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ return "SE0_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ return "SE1_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ return "SE2_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ return "SE3_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_IMU_I:
+ return "IMU_I";
+ case AMDGPU_UCODE_ID_IMU_D:
+ return "IMU_D";
+ case AMDGPU_UCODE_ID_STORAGE:
+ return "STORAGE";
+ case AMDGPU_UCODE_ID_SMC:
+ return "SMC";
+ case AMDGPU_UCODE_ID_PPTABLE:
+ return "PPTABLE";
+ case AMDGPU_UCODE_ID_UVD:
+ return "UVD";
+ case AMDGPU_UCODE_ID_UVD1:
+ return "UVD1";
+ case AMDGPU_UCODE_ID_VCE:
+ return "VCE";
+ case AMDGPU_UCODE_ID_VCN:
+ return "VCN";
+ case AMDGPU_UCODE_ID_VCN1:
+ return "VCN1";
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ return "DMCU_ERAM";
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ return "DMCU_INTV";
+ case AMDGPU_UCODE_ID_VCN0_RAM:
+ return "VCN0_RAM";
+ case AMDGPU_UCODE_ID_VCN1_RAM:
+ return "VCN1_RAM";
+ case AMDGPU_UCODE_ID_DMCUB:
+ return "DMCUB";
+ case AMDGPU_UCODE_ID_CAP:
+ return "CAP";
+ default:
+ return "UNKNOWN UCODE";
+ }
+}
+
+#define FW_VERSION_ATTR(name, mode, field) \
+static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct drm_device *ddev = dev_get_drvdata(dev); \
+ struct amdgpu_device *adev = drm_to_adev(ddev); \
+ \
+ return sysfs_emit(buf, "0x%08x\n", adev->field); \
+} \
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi_context.context.bin_desc.fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+
+static struct attribute *fw_attrs[] = {
+ &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
+ &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
+ &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
+ &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
+ &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
+ &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
+ &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
+ &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
+ &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
+ &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
+ &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr,
+ &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr,
+ NULL
+};
+
+static const struct attribute_group fw_attr_group = {
+ .name = "fw_version",
+ .attrs = fw_attrs
+};
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+ return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+ sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t mc_addr, void *kptr)
+{
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr = NULL;
+ const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
+ const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
+ const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
+ const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
+ u8 *ucode_addr;
+
+ if (!ucode->fw)
+ return 0;
+
+ ucode->mc_addr = mc_addr;
+ ucode->kaddr = kptr;
+
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_STORAGE)
+ return 0;
+
+ header = (const struct common_firmware_header *)ucode->fw->data;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)ucode->fw->data;
+ dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
+ dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ ucode->ucode_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(cp_hdr->jt_offset) * 4;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_cntl;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_gpm;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+ ucode_addr = adev->gfx.rlc.save_restore_list_srm;
+ break;
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ ucode->ucode_size = adev->gfx.rlc.rlc_iram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_iram_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcp_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlcv_ucode;
+ break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(dmcu_hdr->intv_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(dmcu_hdr->intv_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_DMCUB:
+ ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode->ucode_size = le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes);
+ break;
+ default:
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ break;
+ }
+ } else {
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes);
+ }
+
+ memcpy(ucode->kaddr, ucode_addr, ucode->ucode_size);
+
+ return 0;
+}
+
+static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
+ uint64_t mc_addr, void *kptr)
+{
+ const struct gfx_firmware_header_v1_0 *header = NULL;
+ const struct common_firmware_header *comm_hdr = NULL;
+ uint8_t *src_addr = NULL;
+ uint8_t *dst_addr = NULL;
+
+ if (!ucode->fw)
+ return 0;
+
+ comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
+ header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ dst_addr = ucode->kaddr +
+ ALIGN(le32_to_cpu(comm_hdr->ucode_size_bytes),
+ PAGE_SIZE);
+ src_addr = (uint8_t *)ucode->fw->data +
+ le32_to_cpu(comm_hdr->ucode_array_offset_bytes) +
+ (le32_to_cpu(header->jt_offset) * 4);
+ memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
+
+ return 0;
+}
+
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
+{
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+ amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
+ amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ &adev->firmware.fw_buf,
+ &adev->firmware.fw_buf_mc,
+ &adev->firmware.fw_buf_ptr);
+ if (!adev->firmware.fw_buf) {
+ dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
+ return -ENOMEM;
+ } else if (amdgpu_sriov_vf(adev)) {
+ memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
+ }
+ }
+ return 0;
+}
+
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+ &adev->firmware.fw_buf_mc,
+ &adev->firmware.fw_buf_ptr);
+}
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+{
+ uint64_t fw_offset = 0;
+ int i;
+ struct amdgpu_firmware_info *ucode = NULL;
+
+ /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_in_reset(adev) || adev->in_suspend))
+ return 0;
+ /*
+ * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
+ * ucode info here
+ */
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ if (amdgpu_sriov_vf(adev))
+ adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3;
+ else
+ adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
+ } else {
+ adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
+ }
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+ if (ucode->fw) {
+ amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
+ adev->firmware.fw_buf_ptr + fw_offset);
+ if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
+ adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
+ adev->firmware.fw_buf_ptr + fw_offset);
+ fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+ }
+ fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
+ }
+ }
+ return 0;
+}
+
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type)
+{
+ if (block_type == MP0_HWIP) {
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "vega10";
+ case CHIP_VEGA12:
+ return "vega12";
+ default:
+ return NULL;
+ }
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ }
+ break;
+ case IP_VERSION(11, 0, 0):
+ return "navi10";
+ case IP_VERSION(11, 0, 2):
+ return "vega20";
+ case IP_VERSION(11, 0, 3):
+ return "renoir";
+ case IP_VERSION(11, 0, 4):
+ return "arcturus";
+ case IP_VERSION(11, 0, 5):
+ return "navi14";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid";
+ case IP_VERSION(11, 0, 9):
+ return "navi12";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby";
+ case IP_VERSION(11, 5, 0):
+ return "vangogh";
+ case IP_VERSION(12, 0, 1):
+ return "green_sardine";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran";
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ return "yellow_carp";
+ }
+ } else if (block_type == MP1_HWIP) {
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return "arcturus_smc";
+ return NULL;
+ case IP_VERSION(11, 0, 0):
+ return "navi10_smc";
+ case IP_VERSION(11, 0, 5):
+ return "navi14_smc";
+ case IP_VERSION(11, 0, 9):
+ return "navi12_smc";
+ case IP_VERSION(11, 0, 7):
+ return "sienna_cichlid_smc";
+ case IP_VERSION(11, 0, 11):
+ return "navy_flounder_smc";
+ case IP_VERSION(11, 0, 12):
+ return "dimgrey_cavefish_smc";
+ case IP_VERSION(11, 0, 13):
+ return "beige_goby_smc";
+ case IP_VERSION(13, 0, 2):
+ return "aldebaran_smc";
+ }
+ } else if (block_type == SDMA0_HWIP) {
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ return "vega10_sdma";
+ case IP_VERSION(4, 0, 1):
+ return "vega12_sdma";
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_sdma";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_sdma";
+ return "raven_sdma";
+ case IP_VERSION(4, 1, 2):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_sdma";
+ return "green_sardine_sdma";
+ case IP_VERSION(4, 2, 0):
+ return "vega20_sdma";
+ case IP_VERSION(4, 2, 2):
+ return "arcturus_sdma";
+ case IP_VERSION(4, 4, 0):
+ return "aldebaran_sdma";
+ case IP_VERSION(5, 0, 0):
+ return "navi10_sdma";
+ case IP_VERSION(5, 0, 1):
+ return "cyan_skillfish2_sdma";
+ case IP_VERSION(5, 0, 2):
+ return "navi14_sdma";
+ case IP_VERSION(5, 0, 5):
+ return "navi12_sdma";
+ case IP_VERSION(5, 2, 0):
+ return "sienna_cichlid_sdma";
+ case IP_VERSION(5, 2, 2):
+ return "navy_flounder_sdma";
+ case IP_VERSION(5, 2, 4):
+ return "dimgrey_cavefish_sdma";
+ case IP_VERSION(5, 2, 5):
+ return "beige_goby_sdma";
+ case IP_VERSION(5, 2, 3):
+ return "yellow_carp_sdma";
+ case IP_VERSION(5, 2, 1):
+ return "vangogh_sdma";
+ }
+ } else if (block_type == UVD_HWIP) {
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2_vcn";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso_vcn";
+ return "raven_vcn";
+ case IP_VERSION(2, 5, 0):
+ return "arcturus_vcn";
+ case IP_VERSION(2, 2, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir_vcn";
+ return "green_sardine_vcn";
+ case IP_VERSION(2, 6, 0):
+ return "aldebaran_vcn";
+ case IP_VERSION(2, 0, 0):
+ return "navi10_vcn";
+ case IP_VERSION(2, 0, 2):
+ if (adev->asic_type == CHIP_NAVI12)
+ return "navi12_vcn";
+ return "navi14_vcn";
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 64):
+ case IP_VERSION(3, 0, 192):
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ return "sienna_cichlid_vcn";
+ return "navy_flounder_vcn";
+ case IP_VERSION(3, 0, 2):
+ return "vangogh_vcn";
+ case IP_VERSION(3, 0, 16):
+ return "dimgrey_cavefish_vcn";
+ case IP_VERSION(3, 0, 33):
+ return "beige_goby_vcn";
+ case IP_VERSION(3, 1, 1):
+ return "yellow_carp_vcn";
+ }
+ } else if (block_type == GC_HWIP) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ return "vega10";
+ case IP_VERSION(9, 2, 1):
+ return "vega12";
+ case IP_VERSION(9, 4, 0):
+ return "vega20";
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "raven2";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "picasso";
+ return "raven";
+ case IP_VERSION(9, 4, 1):
+ return "arcturus";
+ case IP_VERSION(9, 3, 0):
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ return "renoir";
+ return "green_sardine";
+ case IP_VERSION(9, 4, 2):
+ return "aldebaran";
+ case IP_VERSION(10, 1, 10):
+ return "navi10";
+ case IP_VERSION(10, 1, 1):
+ return "navi14";
+ case IP_VERSION(10, 1, 2):
+ return "navi12";
+ case IP_VERSION(10, 3, 0):
+ return "sienna_cichlid";
+ case IP_VERSION(10, 3, 2):
+ return "navy_flounder";
+ case IP_VERSION(10, 3, 1):
+ return "vangogh";
+ case IP_VERSION(10, 3, 4):
+ return "dimgrey_cavefish";
+ case IP_VERSION(10, 3, 5):
+ return "beige_goby";
+ case IP_VERSION(10, 3, 3):
+ return "yellow_carp";
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ return "cyan_skillfish2";
+ }
+ }
+ return NULL;
+}
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
+{
+ int maj, min, rev;
+ char *ip_name;
+ const char *legacy;
+ uint32_t version = adev->ip_versions[block_type][0];
+
+ legacy = amdgpu_ucode_legacy_naming(adev, block_type);
+ if (legacy) {
+ snprintf(ucode_prefix, len, "%s", legacy);
+ return;
+ }
+
+ switch (block_type) {
+ case GC_HWIP:
+ ip_name = "gc";
+ break;
+ case SDMA0_HWIP:
+ ip_name = "sdma";
+ break;
+ case MP0_HWIP:
+ ip_name = "psp";
+ break;
+ case MP1_HWIP:
+ ip_name = "smu";
+ break;
+ case UVD_HWIP:
+ ip_name = "vcn";
+ break;
+ default:
+ BUG();
+ }
+
+ maj = IP_VERSION_MAJ(version);
+ min = IP_VERSION_MIN(version);
+ rev = IP_VERSION_REV(version);
+
+ snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
+}
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ const char *fw_name)
+{
+ int err = request_firmware(fw, fw_name, adev->dev);
+
+ if (err)
+ return -ENODEV;
+ err = amdgpu_ucode_validate(*fw);
+ if (err)
+ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+ return err;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+ release_firmware(*fw);
+ *fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
new file mode 100644
index 000000000..b03321e7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -0,0 +1,566 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_UCODE_H__
+#define __AMDGPU_UCODE_H__
+
+#include "amdgpu_socbb.h"
+
+struct common_firmware_header {
+ uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
+ uint32_t header_size_bytes; /* size of just the header in bytes */
+ uint16_t header_version_major; /* header version */
+ uint16_t header_version_minor; /* header version */
+ uint16_t ip_version_major; /* IP version */
+ uint16_t ip_version_minor; /* IP version */
+ uint32_t ucode_version;
+ uint32_t ucode_size_bytes; /* size of ucode in bytes */
+ uint32_t ucode_array_offset_bytes; /* payload offset from the start of the header */
+ uint32_t crc32; /* crc32 checksum of the payload */
+};
+
+/* version_major=1, version_minor=0 */
+struct mc_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t io_debug_size_bytes; /* size of debug array in dwords */
+ uint32_t io_debug_array_offset_bytes; /* payload offset from the start of the header */
+};
+
+/* version_major=1, version_minor=0 */
+struct smc_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_start_addr;
+};
+
+/* version_major=2, version_minor=0 */
+struct smc_firmware_header_v2_0 {
+ struct smc_firmware_header_v1_0 v1_0;
+ uint32_t ppt_offset_bytes; /* soft pptable offset */
+ uint32_t ppt_size_bytes; /* soft pptable size */
+};
+
+struct smc_soft_pptable_entry {
+ uint32_t id;
+ uint32_t ppt_offset_bytes;
+ uint32_t ppt_size_bytes;
+};
+
+/* version_major=2, version_minor=1 */
+struct smc_firmware_header_v2_1 {
+ struct smc_firmware_header_v1_0 v1_0;
+ uint32_t pptable_count;
+ uint32_t pptable_entry_offset;
+};
+
+struct psp_fw_legacy_bin_desc {
+ uint32_t fw_version;
+ uint32_t offset_bytes;
+ uint32_t size_bytes;
+};
+
+/* version_major=1, version_minor=0 */
+struct psp_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ struct psp_fw_legacy_bin_desc sos;
+};
+
+/* version_major=1, version_minor=1 */
+struct psp_firmware_header_v1_1 {
+ struct psp_firmware_header_v1_0 v1_0;
+ struct psp_fw_legacy_bin_desc toc;
+ struct psp_fw_legacy_bin_desc kdb;
+};
+
+/* version_major=1, version_minor=2 */
+struct psp_firmware_header_v1_2 {
+ struct psp_firmware_header_v1_0 v1_0;
+ struct psp_fw_legacy_bin_desc res;
+ struct psp_fw_legacy_bin_desc kdb;
+};
+
+/* version_major=1, version_minor=3 */
+struct psp_firmware_header_v1_3 {
+ struct psp_firmware_header_v1_1 v1_1;
+ struct psp_fw_legacy_bin_desc spl;
+ struct psp_fw_legacy_bin_desc rl;
+ struct psp_fw_legacy_bin_desc sys_drv_aux;
+ struct psp_fw_legacy_bin_desc sos_aux;
+};
+
+struct psp_fw_bin_desc {
+ uint32_t fw_type;
+ uint32_t fw_version;
+ uint32_t offset_bytes;
+ uint32_t size_bytes;
+};
+
+enum psp_fw_type {
+ PSP_FW_TYPE_UNKOWN,
+ PSP_FW_TYPE_PSP_SOS,
+ PSP_FW_TYPE_PSP_SYS_DRV,
+ PSP_FW_TYPE_PSP_KDB,
+ PSP_FW_TYPE_PSP_TOC,
+ PSP_FW_TYPE_PSP_SPL,
+ PSP_FW_TYPE_PSP_RL,
+ PSP_FW_TYPE_PSP_SOC_DRV,
+ PSP_FW_TYPE_PSP_INTF_DRV,
+ PSP_FW_TYPE_PSP_DBG_DRV,
+ PSP_FW_TYPE_PSP_RAS_DRV,
+ PSP_FW_TYPE_MAX_INDEX,
+};
+
+/* version_major=2, version_minor=0 */
+struct psp_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ struct psp_fw_bin_desc psp_fw_bin[];
+};
+
+/* version_major=1, version_minor=0 */
+struct ta_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ struct psp_fw_legacy_bin_desc xgmi;
+ struct psp_fw_legacy_bin_desc ras;
+ struct psp_fw_legacy_bin_desc hdcp;
+ struct psp_fw_legacy_bin_desc dtm;
+ struct psp_fw_legacy_bin_desc securedisplay;
+};
+
+enum ta_fw_type {
+ TA_FW_TYPE_UNKOWN,
+ TA_FW_TYPE_PSP_ASD,
+ TA_FW_TYPE_PSP_XGMI,
+ TA_FW_TYPE_PSP_RAS,
+ TA_FW_TYPE_PSP_HDCP,
+ TA_FW_TYPE_PSP_DTM,
+ TA_FW_TYPE_PSP_RAP,
+ TA_FW_TYPE_PSP_SECUREDISPLAY,
+ TA_FW_TYPE_MAX_INDEX,
+};
+
+/* version_major=2, version_minor=0 */
+struct ta_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ta_fw_bin_count;
+ struct psp_fw_bin_desc ta_fw_bin[];
+};
+
+/* version_major=1, version_minor=0 */
+struct gfx_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t jt_offset; /* jt location */
+ uint32_t jt_size; /* size of jt */
+};
+
+/* version_major=2, version_minor=0 */
+struct gfx_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_size_bytes;
+ uint32_t ucode_offset_bytes;
+ uint32_t data_size_bytes;
+ uint32_t data_offset_bytes;
+ uint32_t ucode_start_addr_lo;
+ uint32_t ucode_start_addr_hi;
+};
+
+/* version_major=1, version_minor=0 */
+struct mes_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t mes_ucode_version;
+ uint32_t mes_ucode_size_bytes;
+ uint32_t mes_ucode_offset_bytes;
+ uint32_t mes_ucode_data_version;
+ uint32_t mes_ucode_data_size_bytes;
+ uint32_t mes_ucode_data_offset_bytes;
+ uint32_t mes_uc_start_addr_lo;
+ uint32_t mes_uc_start_addr_hi;
+ uint32_t mes_data_start_addr_lo;
+ uint32_t mes_data_start_addr_hi;
+};
+
+/* version_major=1, version_minor=0 */
+struct rlc_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t save_and_restore_offset;
+ uint32_t clear_state_descriptor_offset;
+ uint32_t avail_scratch_ram_locations;
+ uint32_t master_pkt_description_offset;
+};
+
+/* version_major=2, version_minor=0 */
+struct rlc_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t jt_offset; /* jt location */
+ uint32_t jt_size; /* size of jt */
+ uint32_t save_and_restore_offset;
+ uint32_t clear_state_descriptor_offset;
+ uint32_t avail_scratch_ram_locations;
+ uint32_t reg_restore_list_size;
+ uint32_t reg_list_format_start;
+ uint32_t reg_list_format_separate_start;
+ uint32_t starting_offsets_start;
+ uint32_t reg_list_format_size_bytes; /* size of reg list format array in bytes */
+ uint32_t reg_list_format_array_offset_bytes; /* payload offset from the start of the header */
+ uint32_t reg_list_size_bytes; /* size of reg list array in bytes */
+ uint32_t reg_list_array_offset_bytes; /* payload offset from the start of the header */
+ uint32_t reg_list_format_separate_size_bytes; /* size of reg list format array in bytes */
+ uint32_t reg_list_format_separate_array_offset_bytes; /* payload offset from the start of the header */
+ uint32_t reg_list_separate_size_bytes; /* size of reg list array in bytes */
+ uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
+};
+
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+ struct rlc_firmware_header_v2_0 v2_0;
+ uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+ uint32_t save_restore_list_cntl_ucode_ver;
+ uint32_t save_restore_list_cntl_feature_ver;
+ uint32_t save_restore_list_cntl_size_bytes;
+ uint32_t save_restore_list_cntl_offset_bytes;
+ uint32_t save_restore_list_gpm_ucode_ver;
+ uint32_t save_restore_list_gpm_feature_ver;
+ uint32_t save_restore_list_gpm_size_bytes;
+ uint32_t save_restore_list_gpm_offset_bytes;
+ uint32_t save_restore_list_srm_ucode_ver;
+ uint32_t save_restore_list_srm_feature_ver;
+ uint32_t save_restore_list_srm_size_bytes;
+ uint32_t save_restore_list_srm_offset_bytes;
+};
+
+/* version_major=2, version_minor=2 */
+struct rlc_firmware_header_v2_2 {
+ struct rlc_firmware_header_v2_1 v2_1;
+ uint32_t rlc_iram_ucode_size_bytes;
+ uint32_t rlc_iram_ucode_offset_bytes;
+ uint32_t rlc_dram_ucode_size_bytes;
+ uint32_t rlc_dram_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=3 */
+struct rlc_firmware_header_v2_3 {
+ struct rlc_firmware_header_v2_2 v2_2;
+ uint32_t rlcp_ucode_version;
+ uint32_t rlcp_ucode_feature_version;
+ uint32_t rlcp_ucode_size_bytes;
+ uint32_t rlcp_ucode_offset_bytes;
+ uint32_t rlcv_ucode_version;
+ uint32_t rlcv_ucode_feature_version;
+ uint32_t rlcv_ucode_size_bytes;
+ uint32_t rlcv_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=4 */
+struct rlc_firmware_header_v2_4 {
+ struct rlc_firmware_header_v2_3 v2_3;
+ uint32_t global_tap_delays_ucode_size_bytes;
+ uint32_t global_tap_delays_ucode_offset_bytes;
+ uint32_t se0_tap_delays_ucode_size_bytes;
+ uint32_t se0_tap_delays_ucode_offset_bytes;
+ uint32_t se1_tap_delays_ucode_size_bytes;
+ uint32_t se1_tap_delays_ucode_offset_bytes;
+ uint32_t se2_tap_delays_ucode_size_bytes;
+ uint32_t se2_tap_delays_ucode_offset_bytes;
+ uint32_t se3_tap_delays_ucode_size_bytes;
+ uint32_t se3_tap_delays_ucode_offset_bytes;
+};
+
+/* version_major=1, version_minor=0 */
+struct sdma_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_change_version;
+ uint32_t jt_offset; /* jt location */
+ uint32_t jt_size; /* size of jt */
+};
+
+/* version_major=1, version_minor=1 */
+struct sdma_firmware_header_v1_1 {
+ struct sdma_firmware_header_v1_0 v1_0;
+ uint32_t digest_size;
+};
+
+/* version_major=2, version_minor=0 */
+struct sdma_firmware_header_v2_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* gpu info payload */
+struct gpu_info_firmware_v1_0 {
+ uint32_t gc_num_se;
+ uint32_t gc_num_cu_per_sh;
+ uint32_t gc_num_sh_per_se;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_tccs;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+};
+
+struct gpu_info_firmware_v1_1 {
+ struct gpu_info_firmware_v1_0 v1_0;
+ uint32_t num_sc_per_sh;
+ uint32_t num_packer_per_sc;
+};
+
+/* gpu info payload
+ * version_major=1, version_minor=1 */
+struct gpu_info_firmware_v1_2 {
+ struct gpu_info_firmware_v1_1 v1_1;
+ struct gpu_info_soc_bounding_box_v1_0 soc_bounding_box;
+};
+
+/* version_major=1, version_minor=0 */
+struct gpu_info_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint16_t version_major; /* version */
+ uint16_t version_minor; /* version */
+};
+
+/* version_major=1, version_minor=0 */
+struct dmcu_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
+ uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
+};
+
+/* version_major=1, version_minor=0 */
+struct dmcub_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t inst_const_bytes; /* size of instruction region, in bytes */
+ uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
+};
+
+/* version_major=1, version_minor=0 */
+struct imu_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t imu_iram_ucode_size_bytes;
+ uint32_t imu_iram_ucode_offset_bytes;
+ uint32_t imu_dram_ucode_size_bytes;
+ uint32_t imu_dram_ucode_offset_bytes;
+};
+
+/* header is fixed size */
+union amdgpu_firmware_header {
+ struct common_firmware_header common;
+ struct mc_firmware_header_v1_0 mc;
+ struct smc_firmware_header_v1_0 smc;
+ struct smc_firmware_header_v2_0 smc_v2_0;
+ struct psp_firmware_header_v1_0 psp;
+ struct psp_firmware_header_v1_1 psp_v1_1;
+ struct psp_firmware_header_v1_3 psp_v1_3;
+ struct psp_firmware_header_v2_0 psp_v2_0;
+ struct ta_firmware_header_v1_0 ta;
+ struct ta_firmware_header_v2_0 ta_v2_0;
+ struct gfx_firmware_header_v1_0 gfx;
+ struct gfx_firmware_header_v2_0 gfx_v2_0;
+ struct rlc_firmware_header_v1_0 rlc;
+ struct rlc_firmware_header_v2_0 rlc_v2_0;
+ struct rlc_firmware_header_v2_1 rlc_v2_1;
+ struct rlc_firmware_header_v2_2 rlc_v2_2;
+ struct rlc_firmware_header_v2_3 rlc_v2_3;
+ struct rlc_firmware_header_v2_4 rlc_v2_4;
+ struct sdma_firmware_header_v1_0 sdma;
+ struct sdma_firmware_header_v1_1 sdma_v1_1;
+ struct sdma_firmware_header_v2_0 sdma_v2_0;
+ struct gpu_info_firmware_header_v1_0 gpu_info;
+ struct dmcu_firmware_header_v1_0 dmcu;
+ struct dmcub_firmware_header_v1_0 dmcub;
+ struct imu_firmware_header_v1_0 imu;
+ uint8_t raw[0x100];
+};
+
+#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
+
+/*
+ * fw loading support
+ */
+enum AMDGPU_UCODE_ID {
+ AMDGPU_UCODE_ID_CAP = 0,
+ AMDGPU_UCODE_ID_SDMA0,
+ AMDGPU_UCODE_ID_SDMA1,
+ AMDGPU_UCODE_ID_SDMA2,
+ AMDGPU_UCODE_ID_SDMA3,
+ AMDGPU_UCODE_ID_SDMA4,
+ AMDGPU_UCODE_ID_SDMA5,
+ AMDGPU_UCODE_ID_SDMA6,
+ AMDGPU_UCODE_ID_SDMA7,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
+ AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
+ AMDGPU_UCODE_ID_CP_CE,
+ AMDGPU_UCODE_ID_CP_PFP,
+ AMDGPU_UCODE_ID_CP_ME,
+ AMDGPU_UCODE_ID_CP_RS64_PFP,
+ AMDGPU_UCODE_ID_CP_RS64_ME,
+ AMDGPU_UCODE_ID_CP_RS64_MEC,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK,
+ AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK,
+ AMDGPU_UCODE_ID_CP_MEC1,
+ AMDGPU_UCODE_ID_CP_MEC1_JT,
+ AMDGPU_UCODE_ID_CP_MEC2,
+ AMDGPU_UCODE_ID_CP_MEC2_JT,
+ AMDGPU_UCODE_ID_CP_MES,
+ AMDGPU_UCODE_ID_CP_MES_DATA,
+ AMDGPU_UCODE_ID_CP_MES1,
+ AMDGPU_UCODE_ID_CP_MES1_DATA,
+ AMDGPU_UCODE_ID_IMU_I,
+ AMDGPU_UCODE_ID_IMU_D,
+ AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+ AMDGPU_UCODE_ID_RLC_IRAM,
+ AMDGPU_UCODE_ID_RLC_DRAM,
+ AMDGPU_UCODE_ID_RLC_P,
+ AMDGPU_UCODE_ID_RLC_V,
+ AMDGPU_UCODE_ID_RLC_G,
+ AMDGPU_UCODE_ID_STORAGE,
+ AMDGPU_UCODE_ID_SMC,
+ AMDGPU_UCODE_ID_PPTABLE,
+ AMDGPU_UCODE_ID_UVD,
+ AMDGPU_UCODE_ID_UVD1,
+ AMDGPU_UCODE_ID_VCE,
+ AMDGPU_UCODE_ID_VCN,
+ AMDGPU_UCODE_ID_VCN1,
+ AMDGPU_UCODE_ID_DMCU_ERAM,
+ AMDGPU_UCODE_ID_DMCU_INTV,
+ AMDGPU_UCODE_ID_VCN0_RAM,
+ AMDGPU_UCODE_ID_VCN1_RAM,
+ AMDGPU_UCODE_ID_DMCUB,
+ AMDGPU_UCODE_ID_MAXIMUM,
+};
+
+/* engine firmware status */
+enum AMDGPU_UCODE_STATUS {
+ AMDGPU_UCODE_STATUS_INVALID,
+ AMDGPU_UCODE_STATUS_NOT_LOADED,
+ AMDGPU_UCODE_STATUS_LOADED,
+};
+
+enum amdgpu_firmware_load_type {
+ AMDGPU_FW_LOAD_DIRECT = 0,
+ AMDGPU_FW_LOAD_PSP,
+ AMDGPU_FW_LOAD_SMU,
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
+};
+
+/* conform to smu_ucode_xfer_cz.h */
+#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
+#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
+#define AMDGPU_CPCE_UCODE_LOADED 0x00000004
+#define AMDGPU_CPPFP_UCODE_LOADED 0x00000008
+#define AMDGPU_CPME_UCODE_LOADED 0x00000010
+#define AMDGPU_CPMEC1_UCODE_LOADED 0x00000020
+#define AMDGPU_CPMEC2_UCODE_LOADED 0x00000040
+#define AMDGPU_CPRLC_UCODE_LOADED 0x00000100
+
+/* amdgpu firmware info */
+struct amdgpu_firmware_info {
+ /* ucode ID */
+ enum AMDGPU_UCODE_ID ucode_id;
+ /* request_firmware */
+ const struct firmware *fw;
+ /* starting mc address */
+ uint64_t mc_addr;
+ /* kernel linear address */
+ void *kaddr;
+ /* ucode_size_bytes */
+ uint32_t ucode_size;
+ /* starting tmr mc address */
+ uint32_t tmr_mc_addr_lo;
+ uint32_t tmr_mc_addr_hi;
+};
+
+struct amdgpu_firmware {
+ struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
+ enum amdgpu_firmware_load_type load_type;
+ struct amdgpu_bo *fw_buf;
+ unsigned int fw_size;
+ unsigned int max_ucodes;
+ /* firmwares are loaded by psp instead of smu from vega10 */
+ const struct amdgpu_psp_funcs *funcs;
+ struct amdgpu_bo *rbuf;
+ struct mutex mutex;
+
+ /* gpu info firmware data pointer */
+ const struct firmware *gpu_info_fw;
+
+ void *fw_buf_ptr;
+ uint64_t fw_buf_mc;
+};
+
+void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+ const char *fw_name);
+void amdgpu_ucode_release(const struct firmware **fw);
+bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
+ uint16_t hdr_major, uint16_t hdr_minor);
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
+
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
+
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
new file mode 100644
index 000000000..db0d94ca4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "umc_v6_7.h"
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(6, 7, 0):
+ umc_v6_7_convert_error_address(adev,
+ err_data, err_addr, ch_inst, umc_inst);
+ break;
+ default:
+ dev_warn(adev->dev,
+ "UMC address to Physical address translation is not supported\n");
+ return AMDGPU_RAS_FAIL;
+ }
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ int ret = AMDGPU_RAS_FAIL;
+
+ err_data.err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev,
+ "Failed to alloc memory for umc error record in MCA notifier!\n");
+ return AMDGPU_RAS_FAIL;
+ }
+
+ /*
+ * Translate UMC channel address to Physical address
+ */
+ ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+ ch_inst, umc_inst);
+ if (ret)
+ goto out;
+
+ if (amdgpu_bad_page_threshold != 0) {
+ amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+ err_data.err_addr_cnt);
+ amdgpu_ras_save_bad_pages(adev, NULL);
+ }
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry,
+ bool reset)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
+ if (ret == -EOPNOTSUPP) {
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
+
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if(!err_data->err_addr)
+ dev_warn(adev->dev, "Failed to alloc memory for "
+ "umc error address record!\n");
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
+ }
+ } else if (!ret) {
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
+
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if(!err_data->err_addr)
+ dev_warn(adev->dev, "Failed to alloc memory for "
+ "umc error address record!\n");
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
+ }
+ }
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count) {
+ dev_info(adev->dev, "%ld uncorrectable hardware errors "
+ "detected in UMC block\n",
+ err_data->ue_count);
+
+ if ((amdgpu_bad_page_threshold != 0) &&
+ err_data->err_addr_cnt) {
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt);
+ amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
+
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+ }
+
+ if (reset)
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ kfree(err_data->err_addr);
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
+{
+ int ret = AMDGPU_RAS_SUCCESS;
+
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ if (reset) {
+ /* MCA poison handler is only responsible for GPU reset,
+ * let MCA notifier do page retirement.
+ */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return ret;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV!\n");
+ }
+
+ return ret;
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
+}
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_umc_ras *ras;
+
+ if (!adev->umc.ras)
+ return 0;
+
+ ras = adev->umc.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register umc ras block!\n");
+ return err;
+ }
+
+ strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ if (!ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+
+ return 0;
+}
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ /* ras init of specific umc version */
+ if (adev->umc.ras &&
+ adev->umc.ras->err_cnt_init)
+ adev->umc.ras->err_cnt_init(adev);
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->umc.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
+void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst)
+{
+ struct eeprom_table_record *err_rec =
+ &err_data->err_addr[err_data->err_addr_cnt];
+
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+}
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data)
+{
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ int ret = 0;
+
+ if (adev->umc.node_inst_num) {
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ } else {
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ ret = func(adev, 0, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev, "Umc %d ch %d func returns %d\n",
+ umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
new file mode 100644
index 000000000..43321f57f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_UMC_H__
+#define __AMDGPU_UMC_H__
+#include "amdgpu_ras.h"
+
+/*
+ * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
+ * is the index of 4KB block
+ */
+#define ADDR_OF_4KB_BLOCK(addr) (((addr) & ~0xffULL) << 4)
+/*
+ * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/* channel index is the index of 256B block */
+#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
+/* offset in 256B block */
+#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
+
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
+#define LOOP_UMC_NODE_INST(node_inst) \
+ for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num)
+
+#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
+ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+
+
+typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
+ uint32_t umc_inst, uint32_t ch_inst, void *data);
+
+struct amdgpu_umc_ras {
+ struct amdgpu_ras_block_object ras_block;
+ void (*err_cnt_init)(struct amdgpu_device *adev);
+ bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+ void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ /* support different eeprom table version for different asic */
+ void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
+};
+
+struct amdgpu_umc_funcs {
+ void (*init_registers)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_umc {
+ /* max error count in one ras query call */
+ uint32_t max_ras_err_cnt_per_query;
+ /* number of umc channel instance with memory map register access */
+ uint32_t channel_inst_num;
+ /* number of umc instance with memory map register access */
+ uint32_t umc_inst_num;
+
+ /* Total number of umc node instance including harvest one */
+ uint32_t node_inst_num;
+
+ /* UMC regiser per channel offset */
+ uint32_t channel_offs;
+ /* how many pages are retired in one UE */
+ uint32_t retire_unit;
+ /* channel index table of interleaved memory */
+ const uint32_t *channel_idx_tbl;
+ struct ras_common_if *ras_if;
+
+ const struct amdgpu_umc_funcs *funcs;
+ struct amdgpu_umc_ras *ras;
+
+ /* active mask for umc node instance */
+ unsigned long active_mask;
+};
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst);
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+ umc_func func, void *data);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
new file mode 100644
index 000000000..107f9bb0e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/ioctl.h>
+
+/*
+ * MMIO debugfs IOCTL structure
+ */
+struct amdgpu_debugfs_regs2_iocdata {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+};
+
+struct amdgpu_debugfs_regs2_iocdata_v2 {
+ __u32 use_srbm, use_grbm, pg_lock;
+ struct {
+ __u32 se, sh, instance;
+ } grbm;
+ struct {
+ __u32 me, pipe, queue, vmid;
+ } srbm;
+ u32 xcc_id;
+};
+
+struct amdgpu_debugfs_gprwave_iocdata {
+ u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
+ struct {
+ u32 thread, vpgr_or_sgpr;
+ } gpr;
+};
+
+/*
+ * MMIO debugfs state data (per file* handle)
+ */
+struct amdgpu_debugfs_regs2_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_regs2_iocdata_v2 id;
+};
+
+struct amdgpu_debugfs_gprwave_data {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ struct amdgpu_debugfs_gprwave_iocdata id;
+};
+
+enum AMDGPU_DEBUGFS_REGS2_CMDS {
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
+};
+
+enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
+ AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
+};
+
+//reg2 interface
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
+
+//gprwave interface
+#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
new file mode 100644
index 000000000..b7441654e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -0,0 +1,1376 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ * Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
+#include "cikd.h"
+#include "uvd/uvd_4_2_d.h"
+
+#include "amdgpu_ras.h"
+
+/* 1 second timeout */
+#define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+/* Firmware versions for VI */
+#define FW_1_65_10 ((1 << 24) | (65 << 16) | (10 << 8))
+#define FW_1_87_11 ((1 << 24) | (87 << 16) | (11 << 8))
+#define FW_1_87_12 ((1 << 24) | (87 << 16) | (12 << 8))
+#define FW_1_37_15 ((1 << 24) | (37 << 16) | (15 << 8))
+
+/* Polaris10/11 firmware version */
+#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+
+/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_TAHITI "amdgpu/tahiti_uvd.bin"
+#define FIRMWARE_VERDE "amdgpu/verde_uvd.bin"
+#define FIRMWARE_PITCAIRN "amdgpu/pitcairn_uvd.bin"
+#define FIRMWARE_OLAND "amdgpu/oland_uvd.bin"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin"
+#endif
+#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
+#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
+#define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin"
+#define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin"
+#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
+#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
+#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
+
+#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
+#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
+
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
+#define UVD_GPCOM_VCPU_CMD 0x03c3
+#define UVD_GPCOM_VCPU_DATA0 0x03c4
+#define UVD_GPCOM_VCPU_DATA1 0x03c5
+#define UVD_NO_OP 0x03ff
+#define UVD_BASE_SI 0x3800
+
+/*
+ * amdgpu_uvd_cs_ctx - Command submission parser context
+ *
+ * Used for emulating virtual memory support on UVD 4.2.
+ */
+struct amdgpu_uvd_cs_ctx {
+ struct amdgpu_cs_parser *parser;
+ unsigned int reg, count;
+ unsigned int data0, data1;
+ unsigned int idx;
+ struct amdgpu_ib *ib;
+
+ /* does the IB has a msg command */
+ bool has_msg_cmd;
+
+ /* minimum buffer sizes */
+ unsigned int *buf_sizes;
+};
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+MODULE_FIRMWARE(FIRMWARE_VERDE);
+MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
+MODULE_FIRMWARE(FIRMWARE_OLAND);
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+MODULE_FIRMWARE(FIRMWARE_KABINI);
+MODULE_FIRMWARE(FIRMWARE_KAVERI);
+MODULE_FIRMWARE(FIRMWARE_HAWAII);
+MODULE_FIRMWARE(FIRMWARE_MULLINS);
+#endif
+MODULE_FIRMWARE(FIRMWARE_TONGA);
+MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+MODULE_FIRMWARE(FIRMWARE_FIJI);
+MODULE_FIRMWARE(FIRMWARE_STONEY);
+MODULE_FIRMWARE(FIRMWARE_POLARIS10);
+MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
+
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
+
+static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+
+static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
+ uint32_t size,
+ struct amdgpu_bo **bo_ptr)
+{
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_bo *bo = NULL;
+ void *addr;
+ int r;
+
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo, NULL, &addr);
+ if (r)
+ return r;
+
+ if (adev->uvd.address_64_bit)
+ goto succ;
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_uvd_force_into_uvd_segment(bo);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ goto err;
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r)
+ goto err_pin;
+ r = amdgpu_bo_kmap(bo, &addr);
+ if (r)
+ goto err_kmap;
+succ:
+ amdgpu_bo_unreserve(bo);
+ *bo_ptr = bo;
+ return 0;
+err_kmap:
+ amdgpu_bo_unpin(bo);
+err_pin:
+err:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+ return r;
+}
+
+int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+{
+ unsigned long bo_size;
+ const char *fw_name;
+ const struct common_firmware_header *hdr;
+ unsigned int family_id;
+ int i, j, r;
+
+ INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
+
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ fw_name = FIRMWARE_TAHITI;
+ break;
+ case CHIP_VERDE:
+ fw_name = FIRMWARE_VERDE;
+ break;
+ case CHIP_PITCAIRN:
+ fw_name = FIRMWARE_PITCAIRN;
+ break;
+ case CHIP_OLAND:
+ fw_name = FIRMWARE_OLAND;
+ break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ fw_name = FIRMWARE_BONAIRE;
+ break;
+ case CHIP_KABINI:
+ fw_name = FIRMWARE_KABINI;
+ break;
+ case CHIP_KAVERI:
+ fw_name = FIRMWARE_KAVERI;
+ break;
+ case CHIP_HAWAII:
+ fw_name = FIRMWARE_HAWAII;
+ break;
+ case CHIP_MULLINS:
+ fw_name = FIRMWARE_MULLINS;
+ break;
+#endif
+ case CHIP_TONGA:
+ fw_name = FIRMWARE_TONGA;
+ break;
+ case CHIP_FIJI:
+ fw_name = FIRMWARE_FIJI;
+ break;
+ case CHIP_CARRIZO:
+ fw_name = FIRMWARE_CARRIZO;
+ break;
+ case CHIP_STONEY:
+ fw_name = FIRMWARE_STONEY;
+ break;
+ case CHIP_POLARIS10:
+ fw_name = FIRMWARE_POLARIS10;
+ break;
+ case CHIP_POLARIS11:
+ fw_name = FIRMWARE_POLARIS11;
+ break;
+ case CHIP_POLARIS12:
+ fw_name = FIRMWARE_POLARIS12;
+ break;
+ case CHIP_VEGA10:
+ fw_name = FIRMWARE_VEGA10;
+ break;
+ case CHIP_VEGA12:
+ fw_name = FIRMWARE_VEGA12;
+ break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
+ fw_name);
+ amdgpu_ucode_release(&adev->uvd.fw);
+ return r;
+ }
+
+ /* Set the default UVD handles that the firmware can handle */
+ adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
+
+ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+
+ if (adev->asic_type < CHIP_VEGA20) {
+ unsigned int version_major, version_minor;
+
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
+ version_major, version_minor, family_id);
+
+ /*
+ * Limit the number of UVD handles depending on microcode major
+ * and minor versions. The firmware version which has 40 UVD
+ * instances support is 1.80. So all subsequent versions should
+ * also have the same support.
+ */
+ if ((version_major > 0x01) ||
+ ((version_major == 0x01) && (version_minor >= 0x50)))
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+ (family_id << 8));
+
+ if ((adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS11) &&
+ (adev->uvd.fw_version < FW_1_66_16))
+ DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n",
+ version_major, version_minor);
+ } else {
+ unsigned int enc_major, enc_minor, dec_minor;
+
+ dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
+ enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
+ DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
+ enc_major, enc_minor, dec_minor, family_id);
+
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
+ }
+
+ bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ &adev->uvd.inst[j].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ atomic_set(&adev->uvd.handles[i], 0);
+ adev->uvd.filp[i] = NULL;
+ }
+
+ /* from uvd v5.0 HW addressing capacity increased to 64 bits */
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+ adev->uvd.address_64_bit = true;
+
+ r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
+ if (r)
+ return r;
+
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
+ break;
+ case CHIP_CARRIZO:
+ adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
+ break;
+ case CHIP_FIJI:
+ adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
+ break;
+ case CHIP_STONEY:
+ adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
+ break;
+ default:
+ adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
+ }
+
+ return 0;
+}
+
+int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
+{
+ void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
+ int i, j;
+
+ drm_sched_entity_destroy(&adev->uvd.entity);
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ kvfree(adev->uvd.inst[j].saved_bo);
+
+ amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ (void **)&adev->uvd.inst[j].cpu_addr);
+
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring);
+
+ for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
+ amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
+ amdgpu_ucode_release(&adev->uvd.fw);
+
+ return 0;
+}
+
+/**
+ * amdgpu_uvd_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ struct drm_gpu_scheduler *sched;
+ int r;
+
+ ring = &adev->uvd.inst[0].ring;
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
+ return r;
+ }
+
+ return 0;
+}
+
+int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+{
+ unsigned int size;
+ void *ptr;
+ int i, j, idx;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ /* only valid for physical mode */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ if (atomic_read(&adev->uvd.handles[i]))
+ break;
+
+ if (i == adev->uvd.max_handles)
+ return 0;
+ }
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ if (adev->uvd.inst[j].vcpu_bo == NULL)
+ continue;
+
+ size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+ ptr = adev->uvd.inst[j].cpu_addr;
+
+ adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->uvd.inst[j].saved_bo)
+ return -ENOMEM;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ /* re-write 0 since err_event_athub will corrupt VCPU buffer */
+ if (in_ras_intr)
+ memset(adev->uvd.inst[j].saved_bo, 0, size);
+ else
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+
+ drm_dev_exit(idx);
+ }
+ }
+
+ if (in_ras_intr)
+ DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+
+ return 0;
+}
+
+int amdgpu_uvd_resume(struct amdgpu_device *adev)
+{
+ unsigned int size;
+ void *ptr;
+ int i, idx;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ if (adev->uvd.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+ ptr = adev->uvd.inst[i].cpu_addr;
+
+ if (adev->uvd.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->uvd.inst[i].saved_bo);
+ adev->uvd.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
+
+ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ drm_dev_exit(idx);
+ }
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
+ memset_io(ptr, 0, size);
+ /* to restore uvd fence seq */
+ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
+ }
+ }
+ return 0;
+}
+
+void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
+ int i, r;
+
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ uint32_t handle = atomic_read(&adev->uvd.handles[i]);
+
+ if (handle != 0 && adev->uvd.filp[i] == filp) {
+ struct dma_fence *fence;
+
+ r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
+ &fence);
+ if (r) {
+ DRM_ERROR("Error destroying UVD %d!\n", r);
+ continue;
+ }
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ adev->uvd.filp[i] = NULL;
+ atomic_set(&adev->uvd.handles[i], 0);
+ }
+ }
+}
+
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
+{
+ int i;
+
+ for (i = 0; i < abo->placement.num_placement; ++i) {
+ abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
+ abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ }
+}
+
+static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
+{
+ uint32_t lo, hi;
+ uint64_t addr;
+
+ lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
+ hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
+ addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
+
+ return addr;
+}
+
+/**
+ * amdgpu_uvd_cs_pass1 - first parsing round
+ *
+ * @ctx: UVD parser context
+ *
+ * Make sure UVD message and feedback buffers are in VRAM and
+ * nobody is violating an 256MB boundary.
+ */
+static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
+{
+ struct ttm_operation_ctx tctx = { false, false };
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ uint32_t cmd;
+ uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
+ int r = 0;
+
+ r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
+ }
+
+ if (!ctx->parser->adev->uvd.address_64_bit) {
+ /* check if it's a message or feedback command */
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
+ if (cmd == 0x0 || cmd == 0x3) {
+ /* yes, force it into VRAM */
+ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ amdgpu_bo_placement_from_domain(bo, domain);
+ }
+ amdgpu_uvd_force_into_uvd_segment(bo);
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
+ }
+
+ return r;
+}
+
+/**
+ * amdgpu_uvd_cs_msg_decode - handle UVD decode message
+ *
+ * @adev: amdgpu_device pointer
+ * @msg: pointer to message structure
+ * @buf_sizes: placeholder to put the different buffer lengths
+ *
+ * Peek into the decode message and calculate the necessary buffer sizes.
+ */
+static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
+ unsigned int buf_sizes[])
+{
+ unsigned int stream_type = msg[4];
+ unsigned int width = msg[6];
+ unsigned int height = msg[7];
+ unsigned int dpb_size = msg[9];
+ unsigned int pitch = msg[28];
+ unsigned int level = msg[57];
+
+ unsigned int width_in_mb = width / 16;
+ unsigned int height_in_mb = ALIGN(height / 16, 2);
+ unsigned int fs_in_mb = width_in_mb * height_in_mb;
+
+ unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
+ unsigned int min_ctx_size = ~0;
+
+ image_size = width * height;
+ image_size += image_size / 2;
+ image_size = ALIGN(image_size, 1024);
+
+ switch (stream_type) {
+ case 0: /* H264 */
+ switch (level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ if (num_dpb_buffer > 17)
+ num_dpb_buffer = 17;
+
+ /* reference picture buffer */
+ min_dpb_size = image_size * num_dpb_buffer;
+
+ /* macroblock context buffer */
+ min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
+
+ /* IT surface buffer */
+ min_dpb_size += width_in_mb * height_in_mb * 32;
+ break;
+
+ case 1: /* VC1 */
+
+ /* reference picture buffer */
+ min_dpb_size = image_size * 3;
+
+ /* CONTEXT_BUFFER */
+ min_dpb_size += width_in_mb * height_in_mb * 128;
+
+ /* IT surface buffer */
+ min_dpb_size += width_in_mb * 64;
+
+ /* DB surface buffer */
+ min_dpb_size += width_in_mb * 128;
+
+ /* BP */
+ tmp = max(width_in_mb, height_in_mb);
+ min_dpb_size += ALIGN(tmp * 7 * 16, 64);
+ break;
+
+ case 3: /* MPEG2 */
+
+ /* reference picture buffer */
+ min_dpb_size = image_size * 3;
+ break;
+
+ case 4: /* MPEG4 */
+
+ /* reference picture buffer */
+ min_dpb_size = image_size * 3;
+
+ /* CM */
+ min_dpb_size += width_in_mb * height_in_mb * 64;
+
+ /* IT surface buffer */
+ min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
+ break;
+
+ case 7: /* H264 Perf */
+ switch (level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ if (num_dpb_buffer > 17)
+ num_dpb_buffer = 17;
+
+ /* reference picture buffer */
+ min_dpb_size = image_size * num_dpb_buffer;
+
+ if (!adev->uvd.use_ctx_buf) {
+ /* macroblock context buffer */
+ min_dpb_size +=
+ width_in_mb * height_in_mb * num_dpb_buffer * 192;
+
+ /* IT surface buffer */
+ min_dpb_size += width_in_mb * height_in_mb * 32;
+ } else {
+ /* macroblock context buffer */
+ min_ctx_size =
+ width_in_mb * height_in_mb * num_dpb_buffer * 192;
+ }
+ break;
+
+ case 8: /* MJPEG */
+ min_dpb_size = 0;
+ break;
+
+ case 16: /* H265 */
+ image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
+ image_size = ALIGN(image_size, 256);
+
+ num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
+ min_dpb_size = image_size * num_dpb_buffer;
+ min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
+ * 16 * num_dpb_buffer + 52 * 1024;
+ break;
+
+ default:
+ DRM_ERROR("UVD codec not handled %d!\n", stream_type);
+ return -EINVAL;
+ }
+
+ if (width > pitch) {
+ DRM_ERROR("Invalid UVD decoding target pitch!\n");
+ return -EINVAL;
+ }
+
+ if (dpb_size < min_dpb_size) {
+ DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
+ dpb_size, min_dpb_size);
+ return -EINVAL;
+ }
+
+ buf_sizes[0x1] = dpb_size;
+ buf_sizes[0x2] = image_size;
+ buf_sizes[0x4] = min_ctx_size;
+ /* store image width to adjust nb memory pstate */
+ adev->uvd.decode_image_width = width;
+ return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_msg - handle UVD message
+ *
+ * @ctx: UVD parser context
+ * @bo: buffer object containing the message
+ * @offset: offset into the buffer object
+ *
+ * Peek into the UVD message and extract the session id.
+ * Make sure that we don't open up to many sessions.
+ */
+static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+ struct amdgpu_bo *bo, unsigned int offset)
+{
+ struct amdgpu_device *adev = ctx->parser->adev;
+ int32_t *msg, msg_type, handle;
+ void *ptr;
+ long r;
+ int i;
+
+ if (offset & 0x3F) {
+ DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
+ return r;
+ }
+
+ msg = ptr + offset;
+
+ msg_type = msg[1];
+ handle = msg[2];
+
+ if (handle == 0) {
+ amdgpu_bo_kunmap(bo);
+ DRM_ERROR("Invalid UVD handle!\n");
+ return -EINVAL;
+ }
+
+ switch (msg_type) {
+ case 0:
+ /* it's a create msg, calc image size (width * height) */
+ amdgpu_bo_kunmap(bo);
+
+ /* try to alloc a new handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ DRM_ERROR(")Handle 0x%x already in use!\n",
+ handle);
+ return -EINVAL;
+ }
+
+ if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
+ adev->uvd.filp[i] = ctx->parser->filp;
+ return 0;
+ }
+ }
+
+ DRM_ERROR("No more free UVD handles!\n");
+ return -ENOSPC;
+
+ case 1:
+ /* it's a decode msg, calc buffer sizes */
+ r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
+ amdgpu_bo_kunmap(bo);
+ if (r)
+ return r;
+
+ /* validate the handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ if (adev->uvd.filp[i] != ctx->parser->filp) {
+ DRM_ERROR("UVD handle collision detected!\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+ }
+
+ DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+ return -ENOENT;
+
+ case 2:
+ /* it's a destroy msg, free the handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+ amdgpu_bo_kunmap(bo);
+ return 0;
+
+ default:
+ DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+ }
+
+ amdgpu_bo_kunmap(bo);
+ return -EINVAL;
+}
+
+/**
+ * amdgpu_uvd_cs_pass2 - second parsing round
+ *
+ * @ctx: UVD parser context
+ *
+ * Patch buffer addresses, make sure buffer sizes are correct.
+ */
+static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ uint32_t cmd;
+ uint64_t start, end;
+ uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
+ int r;
+
+ r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
+ }
+
+ start = amdgpu_bo_gpu_offset(bo);
+
+ end = (mapping->last + 1 - mapping->start);
+ end = end * AMDGPU_GPU_PAGE_SIZE + start;
+
+ addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ start += addr;
+
+ amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
+ amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
+
+ cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
+ if (cmd < 0x4) {
+ if ((end - start) < ctx->buf_sizes[cmd]) {
+ DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
+ (unsigned int)(end - start),
+ ctx->buf_sizes[cmd]);
+ return -EINVAL;
+ }
+
+ } else if (cmd == 0x206) {
+ if ((end - start) < ctx->buf_sizes[4]) {
+ DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
+ (unsigned int)(end - start),
+ ctx->buf_sizes[4]);
+ return -EINVAL;
+ }
+ } else if ((cmd != 0x100) && (cmd != 0x204)) {
+ DRM_ERROR("invalid UVD command %X!\n", cmd);
+ return -EINVAL;
+ }
+
+ if (!ctx->parser->adev->uvd.address_64_bit) {
+ if ((start >> 28) != ((end - 1) >> 28)) {
+ DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
+ start, end);
+ return -EINVAL;
+ }
+
+ if ((cmd == 0 || cmd == 0x3) &&
+ (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
+ DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
+ start, end);
+ return -EINVAL;
+ }
+ }
+
+ if (cmd == 0) {
+ ctx->has_msg_cmd = true;
+ r = amdgpu_uvd_cs_msg(ctx, bo, addr);
+ if (r)
+ return r;
+ } else if (!ctx->has_msg_cmd) {
+ DRM_ERROR("Message needed before other commands are send!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_reg - parse register writes
+ *
+ * @ctx: UVD parser context
+ * @cb: callback function
+ *
+ * Parse the register writes, call cb on each complete command.
+ */
+static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
+ int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
+{
+ int i, r;
+
+ ctx->idx++;
+ for (i = 0; i <= ctx->count; ++i) {
+ unsigned int reg = ctx->reg + i;
+
+ if (ctx->idx >= ctx->ib->length_dw) {
+ DRM_ERROR("Register command after end of CS!\n");
+ return -EINVAL;
+ }
+
+ switch (reg) {
+ case mmUVD_GPCOM_VCPU_DATA0:
+ ctx->data0 = ctx->idx;
+ break;
+ case mmUVD_GPCOM_VCPU_DATA1:
+ ctx->data1 = ctx->idx;
+ break;
+ case mmUVD_GPCOM_VCPU_CMD:
+ r = cb(ctx);
+ if (r)
+ return r;
+ break;
+ case mmUVD_ENGINE_CNTL:
+ case mmUVD_NO_OP:
+ break;
+ default:
+ DRM_ERROR("Invalid reg 0x%X!\n", reg);
+ return -EINVAL;
+ }
+ ctx->idx++;
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_packets - parse UVD packets
+ *
+ * @ctx: UVD parser context
+ * @cb: callback function
+ *
+ * Parse the command stream packets.
+ */
+static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
+ int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
+{
+ int r;
+
+ for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
+ uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
+ unsigned int type = CP_PACKET_GET_TYPE(cmd);
+
+ switch (type) {
+ case PACKET_TYPE0:
+ ctx->reg = CP_PACKET0_GET_REG(cmd);
+ ctx->count = CP_PACKET_GET_COUNT(cmd);
+ r = amdgpu_uvd_cs_reg(ctx, cb);
+ if (r)
+ return r;
+ break;
+ case PACKET_TYPE2:
+ ++ctx->idx;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_uvd_ring_parse_cs - UVD command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ *
+ * Parse the command stream, patch in addresses as necessary.
+ */
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_uvd_cs_ctx ctx = {};
+ unsigned int buf_sizes[] = {
+ [0x00000000] = 2048,
+ [0x00000001] = 0xFFFFFFFF,
+ [0x00000002] = 0xFFFFFFFF,
+ [0x00000003] = 2048,
+ [0x00000004] = 0xFFFFFFFF,
+ };
+ int r;
+
+ job->vm = NULL;
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+
+ if (ib->length_dw % 16) {
+ DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
+ ib->length_dw);
+ return -EINVAL;
+ }
+
+ ctx.parser = parser;
+ ctx.buf_sizes = buf_sizes;
+ ctx.ib = ib;
+
+ /* first round only required on chips without UVD 64 bit address support */
+ if (!parser->adev->uvd.address_64_bit) {
+ /* first round, make sure the buffers are actually in the UVD segment */
+ r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
+ if (r)
+ return r;
+ }
+
+ /* second round, patch buffer addresses into the command stream */
+ r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
+ if (r)
+ return r;
+
+ if (!ctx.has_msg_cmd) {
+ DRM_ERROR("UVD-IBs need a msg command!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+ bool direct, struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
+ uint32_t offset, data[4];
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 64, direct ? AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED, &job);
+ if (r)
+ return r;
+
+ if (adev->asic_type >= CHIP_VEGA10)
+ offset = adev->reg_offset[UVD_HWIP][ring->me][1];
+ else
+ offset = UVD_BASE_SI;
+
+ data[0] = PACKET0(offset + UVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(offset + UVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(offset + UVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(offset + UVD_NO_OP, 0);
+
+ ib = &job->ibs[0];
+ addr = amdgpu_bo_gpu_offset(bo);
+ ib->ptr[0] = data[0];
+ ib->ptr[1] = addr;
+ ib->ptr[2] = data[1];
+ ib->ptr[3] = addr >> 32;
+ ib->ptr[4] = data[2];
+ ib->ptr[5] = 0;
+ for (i = 6; i < 16; i += 2) {
+ ib->ptr[i] = data[3];
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ if (direct) {
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err_free;
+ } else {
+ r = drm_sched_job_add_resv_dependencies(&job->base,
+ bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL);
+ if (r)
+ goto err_free;
+
+ f = amdgpu_job_submit(job);
+ }
+
+ amdgpu_bo_reserve(bo, true);
+ amdgpu_bo_fence(bo, f, false);
+ amdgpu_bo_unreserve(bo);
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err_free:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/* multiple fence commands without any stream commands in between can
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
+int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_bo *bo = adev->uvd.ib_bo;
+ uint32_t *msg;
+ int i;
+
+ msg = amdgpu_bo_kptr(bo);
+ /* stitch together an UVD create msg */
+ msg[0] = cpu_to_le32(0x00000de4);
+ msg[1] = cpu_to_le32(0x00000000);
+ msg[2] = cpu_to_le32(handle);
+ msg[3] = cpu_to_le32(0x00000000);
+ msg[4] = cpu_to_le32(0x00000000);
+ msg[5] = cpu_to_le32(0x00000000);
+ msg[6] = cpu_to_le32(0x00000000);
+ msg[7] = cpu_to_le32(0x00000780);
+ msg[8] = cpu_to_le32(0x00000440);
+ msg[9] = cpu_to_le32(0x00000000);
+ msg[10] = cpu_to_le32(0x01b37000);
+ for (i = 11; i < 1024; ++i)
+ msg[i] = cpu_to_le32(0x0);
+
+ return amdgpu_uvd_send_msg(ring, bo, true, fence);
+
+}
+
+int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_bo *bo = NULL;
+ uint32_t *msg;
+ int r, i;
+
+ if (direct) {
+ bo = adev->uvd.ib_bo;
+ } else {
+ r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
+ if (r)
+ return r;
+ }
+
+ msg = amdgpu_bo_kptr(bo);
+ /* stitch together an UVD destroy msg */
+ msg[0] = cpu_to_le32(0x00000de4);
+ msg[1] = cpu_to_le32(0x00000002);
+ msg[2] = cpu_to_le32(handle);
+ msg[3] = cpu_to_le32(0x00000000);
+ for (i = 4; i < 1024; ++i)
+ msg[i] = cpu_to_le32(0x0);
+
+ r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
+
+ if (!direct)
+ amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+
+ return r;
+}
+
+static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, uvd.idle_work.work);
+ unsigned int fences = 0, i, j;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j)
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+ }
+
+ if (fences == 0) {
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+ } else {
+ schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+ if (set_clocks) {
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, true);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_UNGATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_UNGATE);
+ }
+ }
+}
+
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev))
+ schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_uvd_ring_test_ib - test ib execution
+ *
+ * @ring: amdgpu_ring pointer
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test if we can successfully execute an IB
+ */
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence;
+ long r;
+
+ r = amdgpu_uvd_get_create_msg(ring, 1, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ dma_fence_put(fence);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ if (r < 0)
+ goto error;
+
+ r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+ dma_fence_put(fence);
+
+error:
+ return r;
+}
+
+/**
+ * amdgpu_uvd_used_handles - returns used UVD handles
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the number of UVD handles in use
+ */
+uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
+{
+ unsigned int i;
+ uint32_t used_handles = 0;
+
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ /*
+ * Handles can be freed in any order, and not
+ * necessarily linear. So we need to count
+ * all non-zero handles.
+ */
+ if (atomic_read(&adev->uvd.handles[i]))
+ used_handles++;
+ }
+
+ return used_handles;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
new file mode 100644
index 000000000..9f89bb7cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UVD_H__
+#define __AMDGPU_UVD_H__
+
+#define AMDGPU_DEFAULT_UVD_HANDLES 10
+#define AMDGPU_MAX_UVD_HANDLES 40
+#define AMDGPU_UVD_STACK_SIZE (200*1024)
+#define AMDGPU_UVD_HEAP_SIZE (256*1024)
+#define AMDGPU_UVD_SESSION_SIZE (50*1024)
+#define AMDGPU_UVD_FIRMWARE_OFFSET 256
+
+#define AMDGPU_MAX_UVD_INSTANCES 2
+
+#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \
+ (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
+ 8) - AMDGPU_UVD_FIRMWARE_OFFSET)
+
+struct amdgpu_uvd_inst {
+ struct amdgpu_bo *vcpu_bo;
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ void *saved_bo;
+ struct amdgpu_ring ring;
+ struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
+ struct amdgpu_irq_src irq;
+ uint32_t srbm_soft_reset;
+};
+
+#define AMDGPU_UVD_HARVEST_UVD0 (1 << 0)
+#define AMDGPU_UVD_HARVEST_UVD1 (1 << 1)
+
+struct amdgpu_uvd {
+ const struct firmware *fw; /* UVD firmware */
+ unsigned fw_version;
+ unsigned max_handles;
+ unsigned num_enc_rings;
+ uint8_t num_uvd_inst;
+ bool address_64_bit;
+ bool use_ctx_buf;
+ struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
+ struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
+ atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
+ struct drm_sched_entity entity;
+ struct delayed_work idle_work;
+ unsigned harvest_config;
+ /* store image width to adjust nb memory state */
+ unsigned decode_image_width;
+ uint32_t keyselect;
+ struct amdgpu_bo *ib_bo;
+};
+
+int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
+int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
+int amdgpu_uvd_suspend(struct amdgpu_device *adev);
+int amdgpu_uvd_resume(struct amdgpu_device *adev);
+int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence);
+int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence);
+void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
+ struct drm_file *filp);
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
new file mode 100644
index 000000000..1904edf68
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -0,0 +1,1185 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_cs.h"
+#include "cikd.h"
+
+/* 1 second timeout */
+#define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin"
+#endif
+#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
+#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
+#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
+#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
+#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
+#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
+
+#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
+#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
+
+#ifdef CONFIG_DRM_AMDGPU_CIK
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+MODULE_FIRMWARE(FIRMWARE_KABINI);
+MODULE_FIRMWARE(FIRMWARE_KAVERI);
+MODULE_FIRMWARE(FIRMWARE_HAWAII);
+MODULE_FIRMWARE(FIRMWARE_MULLINS);
+#endif
+MODULE_FIRMWARE(FIRMWARE_TONGA);
+MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+MODULE_FIRMWARE(FIRMWARE_FIJI);
+MODULE_FIRMWARE(FIRMWARE_STONEY);
+MODULE_FIRMWARE(FIRMWARE_POLARIS10);
+MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
+
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
+
+static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence);
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence);
+
+/**
+ * amdgpu_vce_sw_init - allocate memory, load vce firmware
+ *
+ * @adev: amdgpu_device pointer
+ * @size: size for the new BO
+ *
+ * First step to get VCE online, allocate memory and load the firmware
+ */
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+{
+ const char *fw_name;
+ const struct common_firmware_header *hdr;
+ unsigned int ucode_version, version_major, version_minor, binary_id;
+ int i, r;
+
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ fw_name = FIRMWARE_BONAIRE;
+ break;
+ case CHIP_KAVERI:
+ fw_name = FIRMWARE_KAVERI;
+ break;
+ case CHIP_KABINI:
+ fw_name = FIRMWARE_KABINI;
+ break;
+ case CHIP_HAWAII:
+ fw_name = FIRMWARE_HAWAII;
+ break;
+ case CHIP_MULLINS:
+ fw_name = FIRMWARE_MULLINS;
+ break;
+#endif
+ case CHIP_TONGA:
+ fw_name = FIRMWARE_TONGA;
+ break;
+ case CHIP_CARRIZO:
+ fw_name = FIRMWARE_CARRIZO;
+ break;
+ case CHIP_FIJI:
+ fw_name = FIRMWARE_FIJI;
+ break;
+ case CHIP_STONEY:
+ fw_name = FIRMWARE_STONEY;
+ break;
+ case CHIP_POLARIS10:
+ fw_name = FIRMWARE_POLARIS10;
+ break;
+ case CHIP_POLARIS11:
+ fw_name = FIRMWARE_POLARIS11;
+ break;
+ case CHIP_POLARIS12:
+ fw_name = FIRMWARE_POLARIS12;
+ break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
+ case CHIP_VEGA10:
+ fw_name = FIRMWARE_VEGA10;
+ break;
+ case CHIP_VEGA12:
+ fw_name = FIRMWARE_VEGA12;
+ break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
+ fw_name);
+ amdgpu_ucode_release(&adev->vce.fw);
+ return r;
+ }
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+
+ ucode_version = le32_to_cpu(hdr->ucode_version);
+ version_major = (ucode_version >> 20) & 0xfff;
+ version_minor = (ucode_version >> 8) & 0xfff;
+ binary_id = ucode_version & 0xff;
+ DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
+ version_major, version_minor, binary_id);
+ adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
+ (binary_id << 8));
+
+ r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vce.vcpu_bo,
+ &adev->vce.gpu_addr, &adev->vce.cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
+ return r;
+ }
+
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+ atomic_set(&adev->vce.handles[i], 0);
+ adev->vce.filp[i] = NULL;
+ }
+
+ INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
+ mutex_init(&adev->vce.idle_mutex);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vce_sw_fini - free memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Last step on VCE teardown, free firmware memory
+ */
+int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
+{
+ unsigned int i;
+
+ if (adev->vce.vcpu_bo == NULL)
+ return 0;
+
+ drm_sched_entity_destroy(&adev->vce.entity);
+
+ amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+ (void **)&adev->vce.cpu_addr);
+
+ for (i = 0; i < adev->vce.num_rings; i++)
+ amdgpu_ring_fini(&adev->vce.ring[i]);
+
+ amdgpu_ucode_release(&adev->vce.fw);
+ mutex_destroy(&adev->vce.idle_mutex);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vce_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ struct drm_gpu_scheduler *sched;
+ int r;
+
+ ring = &adev->vce.ring[0];
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vce_suspend - unpin VCE fw memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_suspend(struct amdgpu_device *adev)
+{
+ int i;
+
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->vce.vcpu_bo == NULL)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+ if (atomic_read(&adev->vce.handles[i]))
+ break;
+
+ if (i == AMDGPU_MAX_VCE_HANDLES)
+ return 0;
+
+ /* TODO: suspending running encoding sessions isn't supported */
+ return -EINVAL;
+}
+
+/**
+ * amdgpu_vce_resume - pin VCE fw memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_resume(struct amdgpu_device *adev)
+{
+ void *cpu_addr;
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
+ int r, idx;
+
+ if (adev->vce.vcpu_bo == NULL)
+ return -EINVAL;
+
+ r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
+ if (r) {
+ amdgpu_bo_unreserve(adev->vce.vcpu_bo);
+ dev_err(adev->dev, "(%d) VCE map failed\n", r);
+ return r;
+ }
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
+ adev->vce.fw->size - offset);
+ drm_dev_exit(idx);
+ }
+
+ amdgpu_bo_kunmap(adev->vce.vcpu_bo);
+
+ amdgpu_bo_unreserve(adev->vce.vcpu_bo);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vce_idle_work_handler - power off VCE
+ *
+ * @work: pointer to work structure
+ *
+ * power of VCE when it's not used any more
+ */
+static void amdgpu_vce_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vce.idle_work.work);
+ unsigned int i, count = 0;
+
+ for (i = 0; i < adev->vce.num_rings; i++)
+ count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
+
+ if (count == 0) {
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+ } else {
+ schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+ }
+}
+
+/**
+ * amdgpu_vce_ring_begin_use - power up VCE
+ *
+ * @ring: amdgpu ring
+ *
+ * Make sure VCE is powerd up when we want to use it
+ */
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ mutex_lock(&adev->vce.idle_mutex);
+ set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
+ if (set_clocks) {
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, true);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_UNGATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_UNGATE);
+
+ }
+ }
+ mutex_unlock(&adev->vce.idle_mutex);
+}
+
+/**
+ * amdgpu_vce_ring_end_use - power VCE down
+ *
+ * @ring: amdgpu ring
+ *
+ * Schedule work to power VCE down again
+ */
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
+{
+ if (!amdgpu_sriov_vf(ring->adev))
+ schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_vce_free_handles - free still open VCE handles
+ *
+ * @adev: amdgpu_device pointer
+ * @filp: drm file pointer
+ *
+ * Close all VCE handles still open by this file pointer
+ */
+void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
+{
+ struct amdgpu_ring *ring = &adev->vce.ring[0];
+ int i, r;
+
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+ uint32_t handle = atomic_read(&adev->vce.handles[i]);
+
+ if (!handle || adev->vce.filp[i] != filp)
+ continue;
+
+ r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
+ if (r)
+ DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
+
+ adev->vce.filp[i] = NULL;
+ atomic_set(&adev->vce.handles[i], 0);
+ }
+}
+
+/**
+ * amdgpu_vce_get_create_msg - generate a VCE create msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @fence: optional fence to return
+ *
+ * Open up a stream for HW test
+ */
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ const unsigned int ib_size_dw = 1024;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct amdgpu_ib ib_msg;
+ struct dma_fence *f = NULL;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job);
+ if (r)
+ return r;
+
+ memset(&ib_msg, 0, sizeof(ib_msg));
+ /* only one gpu page is needed, alloc +1 page to make addr aligned. */
+ r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ &ib_msg);
+ if (r)
+ goto err;
+
+ ib = &job->ibs[0];
+ /* let addr point to page boundary */
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
+
+ /* stitch together an VCE create msg */
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
+ ib->ptr[ib->length_dw++] = handle;
+
+ if ((ring->adev->vce.fw_version >> 24) >= 52)
+ ib->ptr[ib->length_dw++] = 0x00000040; /* len */
+ else
+ ib->ptr[ib->length_dw++] = 0x00000030; /* len */
+ ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000042;
+ ib->ptr[ib->length_dw++] = 0x0000000a;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+ ib->ptr[ib->length_dw++] = 0x00000080;
+ ib->ptr[ib->length_dw++] = 0x00000060;
+ ib->ptr[ib->length_dw++] = 0x00000100;
+ ib->ptr[ib->length_dw++] = 0x00000100;
+ ib->ptr[ib->length_dw++] = 0x0000000c;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ if ((ring->adev->vce.fw_version >> 24) >= 52) {
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ }
+
+ ib->ptr[ib->length_dw++] = 0x00000014; /* len */
+ ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ amdgpu_ib_free(ring->adev, &ib_msg, f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @direct: direct or delayed pool
+ * @fence: optional fence to return
+ *
+ * Close up a stream for HW test or if userspace failed to do so
+ */
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence)
+{
+ const unsigned int ib_size_dw = 1024;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ ib_size_dw * 4,
+ direct ? AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ /* stitch together an VCE destroy msg */
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
+ ib->ptr[ib->length_dw++] = handle;
+
+ ib->ptr[ib->length_dw++] = 0x00000020; /* len */
+ ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+ ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
+ ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008; /* len */
+ ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ if (direct)
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ else
+ f = amdgpu_job_submit(job);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
+ *
+ * @p: cs parser
+ * @ib: indirect buffer to use
+ * @lo: address of lower dword
+ * @hi: address of higher dword
+ * @size: minimum size
+ * @index: bs/fb index
+ *
+ * Make sure that no BO cross a 4GB boundary.
+ */
+static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
+ struct amdgpu_ib *ib, int lo, int hi,
+ unsigned int size, int32_t index)
+{
+ int64_t offset = ((uint64_t)size) * ((int64_t)index);
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *mapping;
+ unsigned int i, fpfn, lpfn;
+ struct amdgpu_bo *bo;
+ uint64_t addr;
+ int r;
+
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
+ if (index >= 0) {
+ addr += offset;
+ fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
+ lpfn = 0x100000000ULL >> PAGE_SHIFT;
+ } else {
+ fpfn = 0;
+ lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
+ }
+
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
+ addr, lo, hi, size, index);
+ return r;
+ }
+
+ for (i = 0; i < bo->placement.num_placement; ++i) {
+ bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
+ bo->placements[i].lpfn = bo->placements[i].lpfn ?
+ min(bo->placements[i].lpfn, lpfn) : lpfn;
+ }
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+
+/**
+ * amdgpu_vce_cs_reloc - command submission relocation
+ *
+ * @p: parser context
+ * @ib: indirect buffer to use
+ * @lo: address of lower dword
+ * @hi: address of higher dword
+ * @size: minimum size
+ * @index: bs/fb index
+ *
+ * Patch relocation inside command stream with real buffer address
+ */
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
+ int lo, int hi, unsigned int size, uint32_t index)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ uint64_t addr;
+ int r;
+
+ if (index == 0xffffffff)
+ index = 0;
+
+ addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+ ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
+ addr += ((uint64_t)size) * ((uint64_t)index);
+
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
+ addr, lo, hi, size, index);
+ return r;
+ }
+
+ if ((addr + (uint64_t)size) >
+ (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
+ addr, lo, hi);
+ return -EINVAL;
+ }
+
+ addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ addr += amdgpu_bo_gpu_offset(bo);
+ addr -= ((uint64_t)size) * ((uint64_t)index);
+
+ amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
+ amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
+
+ return 0;
+}
+
+/**
+ * amdgpu_vce_validate_handle - validate stream handle
+ *
+ * @p: parser context
+ * @handle: handle to validate
+ * @allocated: allocated a new handle?
+ *
+ * Validates the handle and return the found session index or -EINVAL
+ * we don't have another free session index.
+ */
+static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
+ uint32_t handle, uint32_t *allocated)
+{
+ unsigned int i;
+
+ /* validate the handle */
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+ if (atomic_read(&p->adev->vce.handles[i]) == handle) {
+ if (p->adev->vce.filp[i] != p->filp) {
+ DRM_ERROR("VCE handle collision detected!\n");
+ return -EINVAL;
+ }
+ return i;
+ }
+ }
+
+ /* handle not found try to alloc a new one */
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+ if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
+ p->adev->vce.filp[i] = p->filp;
+ p->adev->vce.img_size[i] = 0;
+ *allocated |= 1 << i;
+ return i;
+ }
+ }
+
+ DRM_ERROR("No more free VCE handles!\n");
+ return -EINVAL;
+}
+
+/**
+ * amdgpu_vce_ring_parse_cs - parse and validate the command stream
+ *
+ * @p: parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ */
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ unsigned int fb_idx = 0, bs_idx = 0;
+ int session_idx = -1;
+ uint32_t destroyed = 0;
+ uint32_t created = 0;
+ uint32_t allocated = 0;
+ uint32_t tmp, handle = 0;
+ uint32_t *size = &tmp;
+ unsigned int idx;
+ int i, r = 0;
+
+ job->vm = NULL;
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+
+ for (idx = 0; idx < ib->length_dw;) {
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+ if ((len < 8) || (len & 3)) {
+ DRM_ERROR("invalid VCE command length (%d)!\n", len);
+ r = -EINVAL;
+ goto out;
+ }
+
+ switch (cmd) {
+ case 0x00000002: /* task info */
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
+ break;
+
+ case 0x03000001: /* encode */
+ r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
+ 0, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
+ 0, 0);
+ if (r)
+ goto out;
+ break;
+
+ case 0x05000001: /* context buffer */
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
+ if (r)
+ goto out;
+ break;
+
+ case 0x05000004: /* video bitstream buffer */
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ tmp, bs_idx);
+ if (r)
+ goto out;
+ break;
+
+ case 0x05000005: /* feedback buffer */
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 4096, fb_idx);
+ if (r)
+ goto out;
+ break;
+
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+ 0, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
+ 0, 0);
+ if (r)
+ goto out;
+ break;
+ }
+
+ idx += len / 4;
+ }
+
+ for (idx = 0; idx < ib->length_dw;) {
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+ switch (cmd) {
+ case 0x00000001: /* session */
+ handle = amdgpu_ib_get_value(ib, idx + 2);
+ session_idx = amdgpu_vce_validate_handle(p, handle,
+ &allocated);
+ if (session_idx < 0) {
+ r = session_idx;
+ goto out;
+ }
+ size = &p->adev->vce.img_size[session_idx];
+ break;
+
+ case 0x00000002: /* task info */
+ fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+ bs_idx = amdgpu_ib_get_value(ib, idx + 7);
+ break;
+
+ case 0x01000001: /* create */
+ created |= 1 << session_idx;
+ if (destroyed & (1 << session_idx)) {
+ destroyed &= ~(1 << session_idx);
+ allocated |= 1 << session_idx;
+
+ } else if (!(allocated & (1 << session_idx))) {
+ DRM_ERROR("Handle already in use!\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ *size = amdgpu_ib_get_value(ib, idx + 8) *
+ amdgpu_ib_get_value(ib, idx + 10) *
+ 8 * 3 / 2;
+ break;
+
+ case 0x04000001: /* config extension */
+ case 0x04000002: /* pic control */
+ case 0x04000005: /* rate control */
+ case 0x04000007: /* motion estimation */
+ case 0x04000008: /* rdo */
+ case 0x04000009: /* vui */
+ case 0x05000002: /* auxiliary buffer */
+ case 0x05000009: /* clock table */
+ break;
+
+ case 0x0500000c: /* hw config */
+ switch (p->adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ case CHIP_MULLINS:
+#endif
+ case CHIP_CARRIZO:
+ break;
+ default:
+ r = -EINVAL;
+ goto out;
+ }
+ break;
+
+ case 0x03000001: /* encode */
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
+ *size, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
+ *size / 3, 0);
+ if (r)
+ goto out;
+ break;
+
+ case 0x02000001: /* destroy */
+ destroyed |= 1 << session_idx;
+ break;
+
+ case 0x05000001: /* context buffer */
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+ *size * 2, 0);
+ if (r)
+ goto out;
+ break;
+
+ case 0x05000004: /* video bitstream buffer */
+ tmp = amdgpu_ib_get_value(ib, idx + 4);
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+ tmp, bs_idx);
+ if (r)
+ goto out;
+ break;
+
+ case 0x05000005: /* feedback buffer */
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+ 4096, fb_idx);
+ if (r)
+ goto out;
+ break;
+
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
+ idx + 2, *size, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
+ idx + 7, *size / 12, 0);
+ if (r)
+ goto out;
+ break;
+
+ default:
+ DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (session_idx == -1) {
+ DRM_ERROR("no session command at start of IB\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ idx += len / 4;
+ }
+
+ if (allocated & ~created) {
+ DRM_ERROR("New session without create command!\n");
+ r = -ENOENT;
+ }
+
+out:
+ if (!r) {
+ /* No error, free all destroyed handle slots */
+ tmp = destroyed;
+ } else {
+ /* Error during parsing, free all allocated handle slots */
+ tmp = allocated;
+ }
+
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+ if (tmp & (1 << i))
+ atomic_set(&p->adev->vce.handles[i], 0);
+
+ return r;
+}
+
+/**
+ * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
+ *
+ * @p: parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ */
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ int session_idx = -1;
+ uint32_t destroyed = 0;
+ uint32_t created = 0;
+ uint32_t allocated = 0;
+ uint32_t tmp, handle = 0;
+ int i, r = 0, idx = 0;
+
+ while (idx < ib->length_dw) {
+ uint32_t len = amdgpu_ib_get_value(ib, idx);
+ uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+ if ((len < 8) || (len & 3)) {
+ DRM_ERROR("invalid VCE command length (%d)!\n", len);
+ r = -EINVAL;
+ goto out;
+ }
+
+ switch (cmd) {
+ case 0x00000001: /* session */
+ handle = amdgpu_ib_get_value(ib, idx + 2);
+ session_idx = amdgpu_vce_validate_handle(p, handle,
+ &allocated);
+ if (session_idx < 0) {
+ r = session_idx;
+ goto out;
+ }
+ break;
+
+ case 0x01000001: /* create */
+ created |= 1 << session_idx;
+ if (destroyed & (1 << session_idx)) {
+ destroyed &= ~(1 << session_idx);
+ allocated |= 1 << session_idx;
+
+ } else if (!(allocated & (1 << session_idx))) {
+ DRM_ERROR("Handle already in use!\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ break;
+
+ case 0x02000001: /* destroy */
+ destroyed |= 1 << session_idx;
+ break;
+
+ default:
+ break;
+ }
+
+ if (session_idx == -1) {
+ DRM_ERROR("no session command at start of IB\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ idx += len / 4;
+ }
+
+ if (allocated & ~created) {
+ DRM_ERROR("New session without create command!\n");
+ r = -ENOENT;
+ }
+
+out:
+ if (!r) {
+ /* No error, free all destroyed handle slots */
+ tmp = destroyed;
+ amdgpu_ib_free(p->adev, ib, NULL);
+ } else {
+ /* Error during parsing, free all allocated handle slots */
+ tmp = allocated;
+ }
+
+ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+ if (tmp & (1 << i))
+ atomic_set(&p->adev->vce.handles[i], 0);
+
+ return r;
+}
+
+/**
+ * amdgpu_vce_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: engine to use
+ * @job: job to retrieve vmid from
+ * @ib: the IB to execute
+ * @flags: unused
+ *
+ */
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ amdgpu_ring_write(ring, VCE_CMD_IB);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * amdgpu_vce_ring_emit_fence - add a fence command to the ring
+ *
+ * @ring: engine to use
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ */
+void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCE_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCE_CMD_TRAP);
+ amdgpu_ring_write(ring, VCE_CMD_END);
+}
+
+/**
+ * amdgpu_vce_ring_test_ring - test if VCE ring is working
+ *
+ * @ring: the engine to test on
+ *
+ */
+int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t rptr;
+ unsigned int i;
+ int r, timeout = adev->usec_timeout;
+
+ /* skip ring test for sriov*/
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 16);
+ if (r)
+ return r;
+
+ rptr = amdgpu_ring_get_rptr(ring);
+
+ amdgpu_ring_write(ring, VCE_CMD_END);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < timeout; i++) {
+ if (amdgpu_ring_get_rptr(ring) != rptr)
+ break;
+ udelay(1);
+ }
+
+ if (i >= timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * amdgpu_vce_ring_test_ib - test if VCE IBs are working
+ *
+ * @ring: the engine to test on
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ */
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence = NULL;
+ long r;
+
+ /* skip vce ring1/2 ib test for now, since it's not reliable */
+ if (ring != &ring->adev->vce.ring[0])
+ return 0;
+
+ r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+ if (r)
+ goto error;
+
+ r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+error:
+ dma_fence_put(fence);
+ return r;
+}
+
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
+{
+ switch (ring) {
+ case 0:
+ return AMDGPU_RING_PRIO_0;
+ case 1:
+ return AMDGPU_RING_PRIO_1;
+ case 2:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
new file mode 100644
index 000000000..ea680fc9a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCE_H__
+#define __AMDGPU_VCE_H__
+
+#define AMDGPU_MAX_VCE_HANDLES 16
+#define AMDGPU_VCE_FIRMWARE_OFFSET 256
+
+#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
+#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
+
+#define AMDGPU_VCE_FW_53_45 ((53 << 24) | (45 << 16))
+
+struct amdgpu_vce {
+ struct amdgpu_bo *vcpu_bo;
+ uint64_t gpu_addr;
+ void *cpu_addr;
+ void *saved_bo;
+ unsigned fw_version;
+ unsigned fb_version;
+ atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
+ struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
+ uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
+ struct delayed_work idle_work;
+ struct mutex idle_mutex;
+ const struct firmware *fw; /* VCE firmware */
+ struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
+ struct amdgpu_irq_src irq;
+ unsigned harvest_config;
+ struct drm_sched_entity entity;
+ uint32_t srbm_soft_reset;
+ unsigned num_rings;
+};
+
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
+int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev);
+int amdgpu_vce_suspend(struct amdgpu_device *adev);
+int amdgpu_vce_resume(struct amdgpu_device *adev);
+void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
+unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring);
+unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring);
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
new file mode 100644
index 000000000..03b4bcfca
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -0,0 +1,1263 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vcn.h"
+#include "soc15d.h"
+
+/* Firmware Names */
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
+#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
+MODULE_FIRMWARE(FIRMWARE_PICASSO);
+MODULE_FIRMWARE(FIRMWARE_RAVEN2);
+MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
+MODULE_FIRMWARE(FIRMWARE_RENOIR);
+MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
+MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
+MODULE_FIRMWARE(FIRMWARE_NAVI10);
+MODULE_FIRMWARE(FIRMWARE_NAVI14);
+MODULE_FIRMWARE(FIRMWARE_NAVI12);
+MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
+MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
+MODULE_FIRMWARE(FIRMWARE_VANGOGH);
+MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
+MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
+MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
+MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+
+int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ char fw_name[40];
+ int r;
+
+ amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.fw);
+
+ return r;
+}
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+{
+ unsigned long bo_size;
+ const struct common_firmware_header *hdr;
+ unsigned char fw_check;
+ unsigned int fw_shared_size, log_offset;
+ int i, r;
+
+ INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+ mutex_init(&adev->vcn.vcn_pg_lock);
+ mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
+ atomic_set(&adev->vcn.total_submission_cnt, 0);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+
+ /*
+ * Some Steam Deck's BIOS versions are incompatible with the
+ * indirect SRAM mode, leading to amdgpu being unable to get
+ * properly probed (and even potentially crashing the kernel).
+ * Hence, check for these versions here - notice this is
+ * restricted to Vangogh (Deck's APU).
+ */
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) {
+ const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+ if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.indirect_sram = false;
+ dev_info(adev->dev,
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ }
+ }
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
+
+ /* Bit 20-23, it is encode major and non-zero for new naming convention.
+ * This field is part of version minor and DRM_DISABLED_FLAG in old naming
+ * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
+ * is zero in old naming convention, this field is always zero so far.
+ * These four bits are used to tell which naming convention is present.
+ */
+ fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
+ if (fw_check) {
+ unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
+
+ fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
+ enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
+ enc_major = fw_check;
+ dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
+ vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
+ DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ enc_major, enc_minor, dec_ver, vep, fw_rev);
+ } else {
+ unsigned int version_major, version_minor, family_id;
+
+ family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
+ version_major, version_minor, family_id);
+ }
+
+ bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
+ } else {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+ log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
+ }
+
+ bo_size += fw_shared_size;
+
+ if (amdgpu_vcnfw_log)
+ bo_size += AMDGPU_VCNFW_LOG_SIZE;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
+
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
+
+ if (adev->vcn.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[j].dpg_sram_bo,
+ &adev->vcn.inst[j].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+
+ kvfree(adev->vcn.inst[j].saved_bo);
+
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
+ &adev->vcn.inst[j].gpu_addr,
+ (void **)&adev->vcn.inst[j].cpu_addr);
+
+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ }
+
+ amdgpu_ucode_release(&adev->vcn.fw);
+ mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_destroy(&adev->vcn.vcn_pg_lock);
+
+ return 0;
+}
+
+/* from vcn4 and above, only unified queue is used */
+static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool ret = false;
+
+ if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
+ ret = true;
+
+ return ret;
+}
+
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
+{
+ bool ret = false;
+ int vcn_config = adev->vcn.vcn_config[vcn_instance];
+
+ if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
+ ret = true;
+ else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
+ ret = true;
+ else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
+ ret = true;
+
+ return ret;
+}
+
+int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+{
+ unsigned int size;
+ void *ptr;
+ int i, idx;
+
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ /* err_event_athub will corrupt VCPU buffer, so we need to
+ * restore fw data and clear buffer in amdgpu_vcn_resume() */
+ if (in_ras_intr)
+ return 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
+
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
+ }
+ }
+ return 0;
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev)
+{
+ unsigned int size;
+ void *ptr;
+ int i, idx;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
+
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ drm_dev_exit(idx);
+ }
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
+ memset_io(ptr, 0, size);
+ }
+ }
+ return 0;
+}
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
+ int r = 0;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (fence[j] ||
+ unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, j, &new_state);
+ }
+
+ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
+ fences += fence[j];
+ }
+
+ if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ } else {
+ schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ atomic_inc(&adev->vcn.total_submission_cnt);
+
+ if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+ }
+
+ mutex_lock(&adev->vcn.vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+
+ if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
+
+ adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ }
+ mutex_unlock(&adev->vcn.vcn_pg_lock);
+}
+
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
+{
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&ring->adev->vcn.total_submission_cnt);
+
+ schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+}
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned int i;
+ int r;
+
+ /* VCN in SRIOV does not support direct register read/write */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t rptr;
+ unsigned int i;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 16);
+ if (r)
+ return r;
+
+ rptr = amdgpu_ring_get_rptr(ring);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (amdgpu_ring_get_rptr(ring) != rptr)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)
+{
+ u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ 64, AMDGPU_IB_POOL_DIRECT,
+ &job);
+ if (r)
+ goto err;
+
+ ib = &job->ibs[0];
+ ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[1] = addr;
+ ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[3] = addr >> 32;
+ ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[5] = 0;
+ for (i = 6; i < 16; i += 2) {
+ ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err_free;
+
+ amdgpu_ib_free(adev, ib_msg, f);
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err_free:
+ amdgpu_job_free(job);
+err:
+ amdgpu_ib_free(adev, ib_msg, f);
+ return r;
+}
+
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *msg;
+ int r, i;
+
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
+ if (r)
+ return r;
+
+ msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+ msg[0] = cpu_to_le32(0x00000028);
+ msg[1] = cpu_to_le32(0x00000038);
+ msg[2] = cpu_to_le32(0x00000001);
+ msg[3] = cpu_to_le32(0x00000000);
+ msg[4] = cpu_to_le32(handle);
+ msg[5] = cpu_to_le32(0x00000000);
+ msg[6] = cpu_to_le32(0x00000001);
+ msg[7] = cpu_to_le32(0x00000028);
+ msg[8] = cpu_to_le32(0x00000010);
+ msg[9] = cpu_to_le32(0x00000000);
+ msg[10] = cpu_to_le32(0x00000007);
+ msg[11] = cpu_to_le32(0x00000000);
+ msg[12] = cpu_to_le32(0x00000780);
+ msg[13] = cpu_to_le32(0x00000440);
+ for (i = 14; i < 1024; ++i)
+ msg[i] = cpu_to_le32(0x0);
+
+ return 0;
+}
+
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *msg;
+ int r, i;
+
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+ AMDGPU_IB_POOL_DIRECT,
+ ib);
+ if (r)
+ return r;
+
+ msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+ msg[0] = cpu_to_le32(0x00000028);
+ msg[1] = cpu_to_le32(0x00000018);
+ msg[2] = cpu_to_le32(0x00000000);
+ msg[3] = cpu_to_le32(0x00000002);
+ msg[4] = cpu_to_le32(handle);
+ msg[5] = cpu_to_le32(0x00000000);
+ for (i = 6; i < 1024; ++i)
+ msg[i] = cpu_to_le32(0x0);
+
+ return 0;
+}
+
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence = NULL;
+ struct amdgpu_ib ib;
+ long r;
+
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
+ if (r)
+ goto error;
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+ dma_fence_put(fence);
+error:
+ return r;
+}
+
+static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
+ uint32_t ib_pack_in_dw, bool enc)
+{
+ uint32_t *ib_checksum;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
+ ib->ptr[ib->length_dw++] = 0x30000002;
+ ib_checksum = &ib->ptr[ib->length_dw++];
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
+ ib->ptr[ib->length_dw++] = 0x30000001;
+ ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
+
+ return ib_checksum;
+}
+
+static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
+ uint32_t ib_pack_in_dw)
+{
+ uint32_t i;
+ uint32_t checksum = 0;
+
+ for (i = 0; i < ib_pack_in_dw; i++)
+ checksum += *(*ib_checksum + 2 + i);
+
+ **ib_checksum = checksum;
+}
+
+static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)
+{
+ struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+ unsigned int ib_size_dw = 64;
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
+ uint32_t *ib_checksum;
+ uint32_t ib_pack_in_dw;
+ int i, r;
+
+ if (sq)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job);
+ if (r)
+ goto err;
+
+ ib = &job->ibs[0];
+ ib->length_dw = 0;
+
+ /* single queue headers */
+ if (sq) {
+ ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ + 4 + 2; /* engine info + decoding ib in dw */
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
+ }
+
+ ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
+ ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
+ ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
+ memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
+
+ decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
+ decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
+ decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err_free;
+
+ amdgpu_ib_free(adev, ib_msg, f);
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err_free:
+ amdgpu_job_free(job);
+err:
+ amdgpu_ib_free(adev, ib_msg, f);
+ return r;
+}
+
+int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence = NULL;
+ struct amdgpu_ib ib;
+ long r;
+
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
+ if (r)
+ goto error;
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+ dma_fence_put(fence);
+error:
+ return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t rptr;
+ unsigned int i;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 16);
+ if (r)
+ return r;
+
+ rptr = amdgpu_ring_get_rptr(ring);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (amdgpu_ring_get_rptr(ring) != rptr)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)
+{
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
+ uint64_t addr;
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
+ int i, r;
+
+ if (sq)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+
+ ib->length_dw = 0;
+
+ if (sq)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+ ib->ptr[ib->length_dw++] = 0x0000000b;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_ib *ib_msg,
+ struct dma_fence **fence)
+{
+ unsigned int ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
+ uint64_t addr;
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
+ int i, r;
+
+ if (sq)
+ ib_size_dw += 8;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+ ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+ &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+
+ ib->length_dw = 0;
+
+ if (sq)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+ ib->ptr[ib->length_dw++] = 0x0000000b;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002;
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_ib ib;
+ long r;
+
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_IB_POOL_DIRECT,
+ &ib);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+error:
+ amdgpu_ib_free(adev, &ib, fence);
+ dma_fence_put(fence);
+
+ return r;
+}
+
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ long r;
+
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) {
+ r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+ if (r)
+ goto error;
+ }
+
+ r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
+
+error:
+ return r;
+}
+
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
+{
+ switch (ring) {
+ case 0:
+ return AMDGPU_RING_PRIO_0;
+ case 1:
+ return AMDGPU_RING_PRIO_1;
+ case 2:
+ return AMDGPU_RING_PRIO_2;
+ default:
+ return AMDGPU_RING_PRIO_0;
+ }
+}
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
+{
+ int i;
+ unsigned int idx;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ break;
+ }
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3))
+ break;
+ }
+ dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
+ }
+}
+
+/*
+ * debugfs for mapping vcn firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_vcn_inst *vcn;
+ void *log_buf;
+ volatile struct amdgpu_vcn_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ vcn = file_inode(f)->i_private;
+ if (!vcn)
+ return -ENODEV;
+
+ if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
+ return -EFAULT;
+
+ log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+
+ plog = (volatile struct amdgpu_vcn_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min(size, (size_t)available);
+ } else {
+ read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_vcn_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
+ struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_vcn_%d_fwlog", i);
+ debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
+ &amdgpu_debugfs_vcnfwlog_fops,
+ AMDGPU_VCNFW_LOG_SIZE);
+#endif
+}
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+ volatile uint32_t *flag = vcn->fw_shared.cpu_addr;
+ void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+ uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
+ volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+ volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ + vcn->fw_shared.log_offset;
+ *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
+ fw_log->is_enabled = 1;
+ fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
+ fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
+ fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
+
+ log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
+ log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->vcn.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for VCN!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i) ||
+ !adev->vcn.inst[i].ras_poison_irq.funcs)
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_vcn_ras *ras;
+
+ if (!adev->vcn.ras)
+ return 0;
+
+ ras = adev->vcn.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register vcn ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "vcn");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->vcn.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
+
+ return 0;
+}
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = (ucode_id ? ucode_id :
+ (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
+ AMDGPU_UCODE_ID_VCN0_RAM)),
+ .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
new file mode 100644
index 000000000..a3eed90b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -0,0 +1,429 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCN_H__
+#define __AMDGPU_VCN_H__
+
+#include "amdgpu_ras.h"
+
+#define AMDGPU_VCN_STACK_SIZE (128*1024)
+#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
+
+#define AMDGPU_VCN_FIRMWARE_OFFSET 256
+#define AMDGPU_VCN_MAX_ENC_RINGS 3
+
+#define AMDGPU_MAX_VCN_INSTANCES 4
+#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
+
+#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
+#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
+
+#define VCN_DEC_KMD_CMD 0x80000000
+#define VCN_DEC_CMD_FENCE 0x00000000
+#define VCN_DEC_CMD_TRAP 0x00000001
+#define VCN_DEC_CMD_WRITE_REG 0x00000004
+#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006
+#define VCN_DEC_CMD_PACKET_START 0x0000000a
+#define VCN_DEC_CMD_PACKET_END 0x0000000b
+
+#define VCN_DEC_SW_CMD_NO_OP 0x00000000
+#define VCN_DEC_SW_CMD_END 0x00000001
+#define VCN_DEC_SW_CMD_IB 0x00000002
+#define VCN_DEC_SW_CMD_FENCE 0x00000003
+#define VCN_DEC_SW_CMD_TRAP 0x00000004
+#define VCN_DEC_SW_CMD_IB_AUTO 0x00000005
+#define VCN_DEC_SW_CMD_SEMAPHORE 0x00000006
+#define VCN_DEC_SW_CMD_PREEMPT_FENCE 0x00000009
+#define VCN_DEC_SW_CMD_REG_WRITE 0x0000000b
+#define VCN_DEC_SW_CMD_REG_WAIT 0x0000000c
+
+#define VCN_ENC_CMD_NO_OP 0x00000000
+#define VCN_ENC_CMD_END 0x00000001
+#define VCN_ENC_CMD_IB 0x00000002
+#define VCN_ENC_CMD_FENCE 0x00000003
+#define VCN_ENC_CMD_TRAP 0x00000004
+#define VCN_ENC_CMD_REG_WRITE 0x0000000b
+#define VCN_ENC_CMD_REG_WAIT 0x0000000c
+
+#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+#define VCN_VID_IP_ADDRESS_2_0 0x0
+#define VCN_AON_IP_ADDRESS_2_0 0x30000
+
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
+#define mmUVD_REG_XX_MASK 0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \
+ do { \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
+#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
+ ({ \
+ uint32_t internal_reg_offset, addr; \
+ bool video_range, video1_range, aon_range, aon1_range; \
+ \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
+ addr <<= 2; \
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \
+ if (video_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \
+ (VCN_VID_IP_ADDRESS_2_0)); \
+ else if (aon_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \
+ (VCN_AON_IP_ADDRESS_2_0)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \
+ (VCN_VID_IP_ADDRESS_2_0)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \
+ (VCN_AON_IP_ADDRESS_2_0)); \
+ else \
+ internal_reg_offset = (0xFFFFF & addr); \
+ \
+ internal_reg_offset >>= 2; \
+ })
+
+#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
+#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4)
+#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
+#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
+#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
+#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10)
+#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
+#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+
+#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
+#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
+
+#define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0)
+#define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1)
+#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
+#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1)
+
+#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+
+enum fw_queue_mode {
+ FW_QUEUE_RING_RESET = 1,
+ FW_QUEUE_DPG_HOLD_OFF = 2,
+};
+
+enum engine_status_constants {
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+ UVD_STATUS__UVD_BUSY = 0x00000004,
+ GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+ UVD_STATUS__IDLE = 0x2,
+ UVD_STATUS__BUSY = 0x5,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+ UVD_STATUS__RBC_BUSY = 0x1,
+ UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0,
+};
+
+enum internal_dpg_state {
+ VCN_DPG_STATE__UNPAUSE = 0,
+ VCN_DPG_STATE__PAUSE,
+};
+
+struct dpg_pause_state {
+ enum internal_dpg_state fw_based;
+ enum internal_dpg_state jpeg;
+};
+
+struct amdgpu_vcn_reg{
+ unsigned data0;
+ unsigned data1;
+ unsigned cmd;
+ unsigned nop;
+ unsigned context_id;
+ unsigned ib_vmid;
+ unsigned ib_bar_low;
+ unsigned ib_bar_high;
+ unsigned ib_size;
+ unsigned gp_scratch8;
+ unsigned scratch9;
+};
+
+struct amdgpu_vcn_fw_shared {
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
+struct amdgpu_vcn_inst {
+ struct amdgpu_bo *vcpu_bo;
+ void *cpu_addr;
+ uint64_t gpu_addr;
+ void *saved_bo;
+ struct amdgpu_ring ring_dec;
+ struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+ atomic_t sched_score;
+ struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
+ struct amdgpu_vcn_reg external;
+ struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
+ atomic_t dpg_enc_submission_cnt;
+ struct amdgpu_vcn_fw_shared fw_shared;
+ uint8_t aid_id;
+};
+
+struct amdgpu_vcn_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_vcn {
+ unsigned fw_version;
+ struct delayed_work idle_work;
+ const struct firmware *fw; /* VCN firmware */
+ unsigned num_enc_rings;
+ enum amd_powergating_state cur_state;
+ bool indirect_sram;
+
+ uint8_t num_vcn_inst;
+ struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
+ uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
+ uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
+ struct amdgpu_vcn_reg internal;
+ struct mutex vcn_pg_lock;
+ struct mutex vcn1_jpeg1_workaround;
+ atomic_t total_submission_cnt;
+
+ unsigned harvest_config;
+ int (*pause_dpg_mode)(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+
+ struct ras_common_if *ras_if;
+ struct amdgpu_vcn_ras *ras;
+
+ uint16_t inst_mask;
+ uint8_t num_inst_per_aid;
+};
+
+struct amdgpu_fw_shared_rb_ptrs_struct {
+ /* to WA DPG R/W ptr issues.*/
+ uint32_t rptr;
+ uint32_t wptr;
+};
+
+struct amdgpu_fw_shared_multi_queue {
+ uint8_t decode_queue_mode;
+ uint8_t encode_generalpurpose_queue_mode;
+ uint8_t encode_lowlatency_queue_mode;
+ uint8_t encode_realtime_queue_mode;
+ uint8_t padding[4];
+};
+
+struct amdgpu_fw_shared_sw_ring {
+ uint8_t is_enabled;
+ uint8_t padding[3];
+};
+
+struct amdgpu_fw_shared_unified_queue_struct {
+ uint8_t is_enabled;
+ uint8_t queue_mode;
+ uint8_t queue_status;
+ uint8_t padding[5];
+};
+
+struct amdgpu_fw_shared_fw_logging {
+ uint8_t is_enabled;
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t size;
+};
+
+struct amdgpu_fw_shared_smu_interface_info {
+ uint8_t smu_interface_type;
+ uint8_t padding[3];
+};
+
+struct amdgpu_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[44];
+ struct amdgpu_fw_shared_rb_ptrs_struct rb;
+ uint8_t pad1[1];
+ struct amdgpu_fw_shared_multi_queue multi_queue;
+ struct amdgpu_fw_shared_sw_ring sw_ring;
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
+};
+
+struct amdgpu_fw_shared_rb_setup {
+ uint32_t is_rb_enabled_flags;
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+ uint32_t rb4_addr_lo;
+ uint32_t rb4_addr_hi;
+ uint32_t rb4_size;
+ uint32_t reserved[6];
+};
+
+struct amdgpu_fw_shared_drm_key_wa {
+ uint8_t method;
+ uint8_t reserved[3];
+};
+
+struct amdgpu_vcn4_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+};
+
+struct amdgpu_vcn_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint8_t wrapped;
+};
+
+struct amdgpu_vcn_decode_buffer {
+ uint32_t valid_buf_flag;
+ uint32_t msg_buffer_address_hi;
+ uint32_t msg_buffer_address_lo;
+ uint32_t pad[30];
+};
+
+#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
+#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
+#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
+
+enum vcn_ring_type {
+ VCN_ENCODE_RING,
+ VCN_DECODE_RING,
+ VCN_UNIFIED_RING,
+};
+
+int amdgpu_vcn_early_init(struct amdgpu_device *adev);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev);
+int amdgpu_vcn_resume(struct amdgpu_device *adev);
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev,
+ enum vcn_ring_type type, uint32_t vcn_instance);
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
+ uint8_t i, struct amdgpu_vcn_inst *vcn);
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
new file mode 100644
index 000000000..f9d3d79f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vf_error.h"
+#include "mxgpu_ai.h"
+
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+ uint16_t sub_error_code,
+ uint16_t error_flags,
+ uint64_t error_data)
+{
+ int index;
+ uint16_t error_code;
+
+ if (!amdgpu_sriov_vf(adev))
+ return;
+
+ error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
+
+ mutex_lock(&adev->virt.vf_errors.lock);
+ index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+ adev->virt.vf_errors.code [index] = error_code;
+ adev->virt.vf_errors.flags [index] = error_flags;
+ adev->virt.vf_errors.data [index] = error_data;
+ adev->virt.vf_errors.write_count ++;
+ mutex_unlock(&adev->virt.vf_errors.lock);
+}
+
+
+void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
+{
+ /* u32 pf2vf_flags = 0; */
+ u32 data1, data2, data3;
+ int index;
+
+ if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
+ (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
+ return;
+ }
+/*
+ TODO: Enable these code when pv2vf_info is merged
+ AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags);
+ if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) {
+ return;
+ }
+*/
+
+ mutex_lock(&adev->virt.vf_errors.lock);
+ /* The errors are overlay of array, correct read_count as full. */
+ if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
+ adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
+ }
+
+ while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
+ index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+ data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
+ adev->virt.vf_errors.flags[index]);
+ data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
+ data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
+
+ adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
+ adev->virt.vf_errors.read_count ++;
+ }
+ mutex_unlock(&adev->virt.vf_errors.lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
new file mode 100644
index 000000000..6436bd053
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VF_ERROR_H__
+#define __VF_ERROR_H__
+
+#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f) (((c & 0xFFFF) << 16) | (f & 0xFFFF))
+#define AMDGIM_ERROR_CODE(t,c) (((t&0xF)<<12)|(c&0xFFF))
+
+/* Please keep enum same as AMD GIM driver */
+enum AMDGIM_ERROR_VF {
+ AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL = 0,
+ AMDGIM_ERROR_VF_NO_VBIOS,
+ AMDGIM_ERROR_VF_GPU_POST_ERROR,
+ AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL,
+ AMDGIM_ERROR_VF_FENCE_INIT_FAIL,
+
+ AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL,
+ AMDGIM_ERROR_VF_IB_INIT_FAIL,
+ AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL,
+ AMDGIM_ERROR_VF_ASIC_RESUME_FAIL,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL,
+
+ AMDGIM_ERROR_VF_TEST,
+ AMDGIM_ERROR_VF_MAX
+};
+
+enum AMDGIM_ERROR_CATEGORY {
+ AMDGIM_ERROR_CATEGORY_NON_USED = 0,
+ AMDGIM_ERROR_CATEGORY_GIM,
+ AMDGIM_ERROR_CATEGORY_PF,
+ AMDGIM_ERROR_CATEGORY_VF,
+ AMDGIM_ERROR_CATEGORY_VBIOS,
+ AMDGIM_ERROR_CATEGORY_MONITOR,
+
+ AMDGIM_ERROR_CATEGORY_MAX
+};
+
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+ uint16_t sub_error_code,
+ uint16_t error_flags,
+ uint64_t error_data);
+void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
+
+#endif /* __VF_ERROR_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
new file mode 100644
index 000000000..96857ae7f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -0,0 +1,1095 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
+#include <drm/drm_drv.h>
+#include <xen/xen.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "vi.h"
+#include "soc15.h"
+#include "nv.h"
+
+#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
+ do { \
+ vf2pf_info->ucode_info[ucode].id = ucode; \
+ vf2pf_info->ucode_info[ucode].version = ver; \
+ } while (0)
+
+bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
+{
+ /* By now all MMIO pages except mailbox are blocked */
+ /* if blocking is enabled in hypervisor. Choose the */
+ /* SCRATCH_REG0 to test. */
+ return RREG32_NO_KIQ(0xc040) == 0xffffffff;
+}
+
+void amdgpu_virt_init_setting(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+
+ /* enable virtual display */
+ if (adev->asic_type != CHIP_ALDEBARAN &&
+ adev->asic_type != CHIP_ARCTURUS &&
+ ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
+ if (adev->mode_info.num_crtc == 0)
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ }
+ ddev->driver_features &= ~DRIVER_ATOMIC;
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+
+ /* Reduce kcq number to 2 to reduce latency */
+ if (amdgpu_num_kcq == -1)
+ amdgpu_num_kcq = 2;
+}
+
+void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+
+ if (adev->mes.ring.sched.ready) {
+ amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+ ref, mask);
+ return;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+ ref, mask);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for IRQ context */
+ if (r < 1 && in_interrupt())
+ goto failed_kiq;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq:
+ dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+}
+
+/**
+ * amdgpu_virt_request_full_gpu() - request full gpu access
+ * @adev: amdgpu device.
+ * @init: is driver init time.
+ * When start to init/fini driver, first need to request full gpu access.
+ * Return: Zero if request success, otherwise will return error.
+ */
+int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r;
+
+ if (virt->ops && virt->ops->req_full_gpu) {
+ r = virt->ops->req_full_gpu(adev, init);
+ if (r)
+ return r;
+
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_virt_release_full_gpu() - release full gpu access
+ * @adev: amdgpu device.
+ * @init: is driver init time.
+ * When finishing driver init/fini, need to release full gpu access.
+ * Return: Zero if release success, otherwise will returen error.
+ */
+int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r;
+
+ if (virt->ops && virt->ops->rel_full_gpu) {
+ r = virt->ops->rel_full_gpu(adev, init);
+ if (r)
+ return r;
+
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_virt_reset_gpu() - reset gpu
+ * @adev: amdgpu device.
+ * Send reset command to GPU hypervisor to reset GPU that VM is using
+ * Return: Zero if reset success, otherwise will return error.
+ */
+int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r;
+
+ if (virt->ops && virt->ops->reset_gpu) {
+ r = virt->ops->reset_gpu(adev);
+ if (r)
+ return r;
+
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ }
+
+ return 0;
+}
+
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_init_data)
+ virt->ops->req_init_data(adev);
+
+ if (adev->virt.req_init_data_ver > 0)
+ DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ else
+ DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+}
+
+/**
+ * amdgpu_virt_wait_reset() - wait for reset gpu completed
+ * @adev: amdgpu device.
+ * Wait for GPU reset completed.
+ * Return: Zero if reset success, otherwise will return error.
+ */
+int amdgpu_virt_wait_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->wait_reset)
+ return -EINVAL;
+
+ return virt->ops->wait_reset(adev);
+}
+
+/**
+ * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
+ * @adev: amdgpu device.
+ * MM table is used by UVD and VCE for its initialization
+ * Return: Zero if allocate success.
+ */
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->virt.mm_table.bo,
+ &adev->virt.mm_table.gpu_addr,
+ (void *)&adev->virt.mm_table.cpu_addr);
+ if (r) {
+ DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ return r;
+ }
+
+ memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
+ DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ adev->virt.mm_table.gpu_addr,
+ adev->virt.mm_table.cpu_addr);
+ return 0;
+}
+
+/**
+ * amdgpu_virt_free_mm_table() - free mm table memory
+ * @adev: amdgpu device.
+ * Free MM table memory
+ */
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
+ return;
+
+ amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
+ &adev->virt.mm_table.gpu_addr,
+ (void *)&adev->virt.mm_table.cpu_addr);
+ adev->virt.mm_table.gpu_addr = 0;
+}
+
+
+unsigned int amd_sriov_msg_checksum(void *obj,
+ unsigned long obj_size,
+ unsigned int key,
+ unsigned int checksum)
+{
+ unsigned int ret = key;
+ unsigned long i = 0;
+ unsigned char *pos;
+
+ pos = (char *)obj;
+ /* calculate checksum */
+ for (i = 0; i < obj_size; ++i)
+ ret += *(pos + i);
+ /* minus the checksum itself */
+ pos = (char *)&checksum;
+ for (i = 0; i < sizeof(checksum); ++i)
+ ret -= *(pos + i);
+ return ret;
+}
+
+static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
+ /* GPU will be marked bad on host if bp count more then 10,
+ * so alloc 512 is enough.
+ */
+ unsigned int align_space = 512;
+ void *bps = NULL;
+ struct amdgpu_bo **bps_bo = NULL;
+
+ *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
+ if (!*data)
+ goto data_failure;
+
+ bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL);
+ if (!bps)
+ goto bps_failure;
+
+ bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);
+ if (!bps_bo)
+ goto bps_bo_failure;
+
+ (*data)->bps = bps;
+ (*data)->bps_bo = bps_bo;
+ (*data)->count = 0;
+ (*data)->last_reserved = 0;
+
+ virt->ras_init_done = true;
+
+ return 0;
+
+bps_bo_failure:
+ kfree(bps);
+bps_failure:
+ kfree(*data);
+data_failure:
+ return -ENOMEM;
+}
+
+static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_bo *bo;
+ int i;
+
+ if (!data)
+ return;
+
+ for (i = data->last_reserved - 1; i >= 0; i--) {
+ bo = data->bps_bo[i];
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
+ data->bps_bo[i] = bo;
+ data->last_reserved = i;
+ }
+}
+
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+ virt->ras_init_done = false;
+
+ if (!data)
+ return;
+
+ amdgpu_virt_ras_release_bp(adev);
+
+ kfree(data->bps);
+ kfree(data->bps_bo);
+ kfree(data);
+ virt->virt_eh_data = NULL;
+}
+
+static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+ if (!data)
+ return;
+
+ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+ data->count += pages;
+}
+
+static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_bo *bo = NULL;
+ uint64_t bp;
+ int i;
+
+ if (!data)
+ return;
+
+ for (i = data->last_reserved; i < data->count; i++) {
+ bp = data->bps[i].retired_page;
+
+ /* There are two cases of reserve error should be ignored:
+ * 1) a ras bad page has been allocated (used by someone);
+ * 2) a ras bad page has been reserved (duplicate error injection
+ * for one page);
+ */
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ &bo, NULL))
+ DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+
+ data->bps_bo[i] = bo;
+ data->last_reserved = i + 1;
+ bo = NULL;
+ }
+}
+
+static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t retired_page)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ int i;
+
+ if (!data)
+ return true;
+
+ for (i = 0; i < data->count; i++)
+ if (retired_page == data->bps[i].retired_page)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
+ uint64_t bp_block_offset, uint32_t bp_block_size)
+{
+ struct eeprom_table_record bp;
+ uint64_t retired_page;
+ uint32_t bp_idx, bp_cnt;
+ void *vram_usage_va = NULL;
+
+ if (adev->mman.fw_vram_usage_va)
+ vram_usage_va = adev->mman.fw_vram_usage_va;
+ else
+ vram_usage_va = adev->mman.drv_vram_usage_va;
+
+ if (bp_block_size) {
+ bp_cnt = bp_block_size / sizeof(uint64_t);
+ for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
+ retired_page = *(uint64_t *)(vram_usage_va +
+ bp_block_offset + bp_idx * sizeof(uint64_t));
+ bp.retired_page = retired_page;
+
+ if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
+ continue;
+
+ amdgpu_virt_ras_add_bps(adev, &bp, 1);
+
+ amdgpu_virt_ras_reserve_bps(adev);
+ }
+ }
+}
+
+static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf;
+ uint32_t checksum;
+ uint32_t checkval;
+
+ uint32_t i;
+ uint32_t tmp;
+
+ if (adev->virt.fw_reserve.p_pf2vf == NULL)
+ return -EINVAL;
+
+ if (pf2vf_info->size > 1024) {
+ DRM_ERROR("invalid pf2vf message size\n");
+ return -EINVAL;
+ }
+
+ switch (pf2vf_info->version) {
+ case 1:
+ checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum;
+ checkval = amd_sriov_msg_checksum(
+ adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
+ adev->virt.fw_reserve.checksum_key, checksum);
+ if (checksum != checkval) {
+ DRM_ERROR("invalid pf2vf message\n");
+ return -EINVAL;
+ }
+
+ adev->virt.gim_feature =
+ ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags;
+ break;
+ case 2:
+ /* TODO: missing key, need to add it later */
+ checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum;
+ checkval = amd_sriov_msg_checksum(
+ adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
+ 0, checksum);
+ if (checksum != checkval) {
+ DRM_ERROR("invalid pf2vf message\n");
+ return -EINVAL;
+ }
+
+ adev->virt.vf2pf_update_interval_ms =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
+ adev->virt.gim_feature =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
+ adev->virt.reg_access =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all;
+
+ adev->virt.decode_max_dimension_pixels = 0;
+ adev->virt.decode_max_frame_pixels = 0;
+ adev->virt.encode_max_dimension_pixels = 0;
+ adev->virt.encode_max_frame_pixels = 0;
+ adev->virt.is_mm_bw_enabled = false;
+ for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) {
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels;
+ adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels;
+ adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels;
+ adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels);
+
+ tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
+ adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
+ }
+ if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
+ adev->virt.is_mm_bw_enabled = true;
+
+ adev->unique_id =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+ break;
+ default:
+ DRM_ERROR("invalid pf2vf version\n");
+ return -EINVAL;
+ }
+
+ /* correct too large or too little interval value */
+ if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
+ adev->virt.vf2pf_update_interval_ms = 2000;
+
+ return 0;
+}
+
+static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_vf2pf_info *vf2pf_info;
+ vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
+
+ if (adev->virt.fw_reserve.p_vf2pf == NULL)
+ return;
+
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE, adev->vce.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD, adev->uvd.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC, adev->gmc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME, adev->gfx.me_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP, adev->gfx.pfp_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE, adev->gfx.ce_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC, adev->gfx.rlc_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
+ adev->psp.asd_context.bin_desc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN, adev->vcn.fw_version);
+ POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU, adev->dm.dmcu_fw_version);
+}
+
+static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_vf2pf_info *vf2pf_info;
+
+ vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
+
+ if (adev->virt.fw_reserve.p_vf2pf == NULL)
+ return -EINVAL;
+
+ memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info));
+
+ vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info);
+ vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER;
+
+#ifdef MODULE
+ if (THIS_MODULE->version != NULL)
+ strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
+ else
+#endif
+ strcpy(vf2pf_info->driver_version, "N/A");
+
+ vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
+ vf2pf_info->driver_cert = 0;
+ vf2pf_info->os_info.all = 0;
+
+ vf2pf_info->fb_usage =
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
+ vf2pf_info->fb_vis_usage =
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
+ vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
+ vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
+
+ amdgpu_virt_populate_vf2pf_ucode_info(adev);
+
+ /* TODO: read dynamic info */
+ vf2pf_info->gfx_usage = 0;
+ vf2pf_info->compute_usage = 0;
+ vf2pf_info->encode_usage = 0;
+ vf2pf_info->decode_usage = 0;
+
+ vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+ vf2pf_info->checksum =
+ amd_sriov_msg_checksum(
+ vf2pf_info, vf2pf_info->header.size, 0, 0);
+
+ return 0;
+}
+
+static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work);
+ int ret;
+
+ ret = amdgpu_virt_read_pf2vf_data(adev);
+ if (ret)
+ goto out;
+ amdgpu_virt_write_vf2pf_data(adev);
+
+out:
+ schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+}
+
+void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
+{
+ if (adev->virt.vf2pf_update_interval_ms != 0) {
+ DRM_INFO("clean up the vf2pf work item\n");
+ cancel_delayed_work_sync(&adev->virt.vf2pf_work);
+ adev->virt.vf2pf_update_interval_ms = 0;
+ }
+}
+
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+{
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ adev->virt.fw_reserve.p_vf2pf = NULL;
+ adev->virt.vf2pf_update_interval_ms = 0;
+
+ if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+ DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+ } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ /* go through this logic in ip_init and reset to init workqueue*/
+ amdgpu_virt_exchange_data(adev);
+
+ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+ } else if (adev->bios != NULL) {
+ /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
+}
+
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+ uint64_t bp_block_offset = 0;
+ uint32_t bp_block_size = 0;
+ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+ if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+ if (adev->mman.fw_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ } else if (adev->mman.drv_vram_usage_va) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ }
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ amdgpu_virt_write_vf2pf_data(adev);
+
+ /* bad page handling for version 2 */
+ if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
+ pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+ ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+ bp_block_size = pf2vf_v2->bp_block_size;
+
+ if (bp_block_size && !adev->virt.ras_init_done)
+ amdgpu_virt_init_ras_err_handler_data(adev);
+
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
+ }
+}
+
+void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+{
+ uint32_t reg;
+
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_VEGA20:
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
+ case CHIP_IP_DISCOVERY:
+ reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
+ break;
+ default: /* other chip doesn't support SRIOV */
+ reg = 0;
+ break;
+ }
+
+ if (reg & 1)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+ if (reg & 0x80000000)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+ if (!reg) {
+ /* passthrough mode exclus sriov mod */
+ if (is_virtual_machine() && !xen_initial_domain())
+ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+ }
+
+ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
+ /* VF MMIO access (except mailbox range) from CPU
+ * will be blocked during sriov runtime
+ */
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
+ /* we have the ability to check now */
+ if (amdgpu_sriov_vf(adev)) {
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ vi_set_virt_ops(adev);
+ break;
+ case CHIP_VEGA10:
+ soc15_set_virt_ops(adev);
+#ifdef CONFIG_X86
+ /* not send GPU_INIT_DATA with MS_HYPERV*/
+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
+#endif
+ /* send a dummy GPU_INIT_DATA request to host on vega10 */
+ amdgpu_virt_request_init_data(adev);
+ break;
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
+ soc15_set_virt_ops(adev);
+ break;
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_IP_DISCOVERY:
+ nv_set_virt_ops(adev);
+ /* try send GPU_INIT_DATA request to host */
+ amdgpu_virt_request_init_data(adev);
+ break;
+ default: /* other chip doesn't support SRIOV */
+ DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ break;
+ }
+ }
+}
+
+static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
+{
+ return amdgpu_sriov_is_debug(adev) ? true : false;
+}
+
+static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
+{
+ return amdgpu_sriov_is_normal(adev) ? true : false;
+}
+
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) ||
+ amdgpu_virt_access_debugfs_is_kiq(adev))
+ return 0;
+
+ if (amdgpu_virt_access_debugfs_is_mmio(adev))
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ else
+ return -EPERM;
+
+ return 0;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
+{
+ enum amdgpu_sriov_vf_mode mode;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ mode = SRIOV_VF_MODE_ONE_VF;
+ else
+ mode = SRIOV_VF_MODE_MULTI_VF;
+ } else {
+ mode = SRIOV_VF_MODE_BARE_METAL;
+ }
+
+ return mode;
+}
+
+void amdgpu_virt_post_reset(struct amdgpu_device *adev)
+{
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
+ /* force set to GFXOFF state after reset,
+ * to avoid some invalid operation before GC enable
+ */
+ adev->gfx.is_poweron = false;
+ }
+}
+
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
+{
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ /* no vf autoload, white list */
+ if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
+ ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ case IP_VERSION(11, 0, 9):
+ case IP_VERSION(11, 0, 7):
+ /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
+ if (ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ case IP_VERSION(13, 0, 10):
+ /* white list */
+ if (ucode_id == AMDGPU_UCODE_ID_CAP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1
+ || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
+ || ucode_id == AMDGPU_UCODE_ID_VCN1
+ || ucode_id == AMDGPU_UCODE_ID_VCN)
+ return false;
+ else
+ return true;
+ default:
+ /* lagacy black list */
+ if (ucode_id == AMDGPU_UCODE_ID_SDMA0
+ || ucode_id == AMDGPU_UCODE_ID_SDMA1
+ || ucode_id == AMDGPU_UCODE_ID_SDMA2
+ || ucode_id == AMDGPU_UCODE_ID_SDMA3
+ || ucode_id == AMDGPU_UCODE_ID_SDMA4
+ || ucode_id == AMDGPU_UCODE_ID_SDMA5
+ || ucode_id == AMDGPU_UCODE_ID_SDMA6
+ || ucode_id == AMDGPU_UCODE_ID_SDMA7
+ || ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode_id == AMDGPU_UCODE_ID_SMC)
+ return true;
+ else
+ return false;
+ }
+}
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+ struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+ struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
+{
+ uint32_t i;
+
+ if (!adev->virt.is_mm_bw_enabled)
+ return;
+
+ if (encode) {
+ for (i = 0; i < encode_array_size; i++) {
+ encode[i].max_width = adev->virt.encode_max_dimension_pixels;
+ encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels;
+ if (encode[i].max_width > 0)
+ encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width;
+ else
+ encode[i].max_height = 0;
+ }
+ }
+
+ if (decode) {
+ for (i = 0; i < decode_array_size; i++) {
+ decode[i].max_width = adev->virt.decode_max_dimension_pixels;
+ decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels;
+ if (decode[i].max_width > 0)
+ decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width;
+ else
+ decode[i].max_height = 0;
+ }
+ }
+}
+
+static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag)
+{
+ bool ret = false;
+
+ switch (hwip) {
+ case GC_HWIP:
+ if (amdgpu_sriov_reg_indirect_gc(adev)) {
+ *rlcg_flag =
+ write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
+ ret = true;
+ /* only in new version, AMDGPU_REGS_NO_KIQ and
+ * AMDGPU_REGS_RLC are enabled simultaneously */
+ } else if ((acc_flags & AMDGPU_REGS_RLC) &&
+ !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
+ *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
+ ret = true;
+ }
+ break;
+ case MMHUB_HWIP:
+ if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
+ (acc_flags & AMDGPU_REGS_RLC) && write) {
+ *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
+ ret = true;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+ uint32_t timeout = 50000;
+ uint32_t i, tmp;
+ uint32_t ret = 0;
+ void *scratch_reg0;
+ void *scratch_reg1;
+ void *scratch_reg2;
+ void *scratch_reg3;
+ void *spare_int;
+
+ if (!adev->gfx.rlc.rlcg_reg_access_supported) {
+ dev_err(adev->dev,
+ "indirect registers access through rlcg is not available\n");
+ return 0;
+ }
+
+ if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+ dev_err(adev->dev, "invalid xcc\n");
+ return 0;
+ }
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
+ scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
+ scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
+ scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
+ scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
+ if (reg_access_ctrl->spare_int)
+ spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
+
+ if (offset == reg_access_ctrl->grbm_cntl) {
+ /* if the target reg offset is grbm_cntl, write to scratch_reg2 */
+ writel(v, scratch_reg2);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else if (offset == reg_access_ctrl->grbm_idx) {
+ /* if the target reg offset is grbm_idx, write to scratch_reg3 */
+ writel(v, scratch_reg3);
+ if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+ writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+ } else {
+ /*
+ * SCRATCH_REG0 = read/write value
+ * SCRATCH_REG1[30:28] = command
+ * SCRATCH_REG1[19:0] = address in dword
+ * SCRATCH_REG1[26:24] = Error reporting
+ */
+ writel(v, scratch_reg0);
+ writel((offset | flag), scratch_reg1);
+ if (reg_access_ctrl->spare_int)
+ writel(1, spare_int);
+
+ for (i = 0; i < timeout; i++) {
+ tmp = readl(scratch_reg1);
+ if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
+ break;
+ udelay(10);
+ }
+
+ if (i >= timeout) {
+ if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
+ if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
+ dev_err(adev->dev,
+ "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
+ dev_err(adev->dev,
+ "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
+ } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
+ dev_err(adev->dev,
+ "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+ } else {
+ dev_err(adev->dev,
+ "unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
+ }
+ } else {
+ dev_err(adev->dev,
+ "timeout: rlcg faled to program reg: 0x%05x\n", offset);
+ }
+ }
+ }
+
+ ret = readl(scratch_reg0);
+ return ret;
+}
+
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
+ return;
+ }
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ WREG32_NO_KIQ(offset, value);
+ else
+ WREG32(offset, value);
+}
+
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+ u32 rlcg_flag;
+
+ if (!amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
+ return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
+
+ if (acc_flags & AMDGPU_REGS_NO_KIQ)
+ return RREG32_NO_KIQ(offset);
+ else
+ return RREG32(offset);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
new file mode 100644
index 000000000..fabb83e9d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Monk.liu@amd.com
+ */
+#ifndef AMDGPU_VIRT_H
+#define AMDGPU_VIRT_H
+
+#include "amdgv_sriovmsg.h"
+
+#define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS (1 << 0) /* vBIOS is sr-iov ready */
+#define AMDGPU_SRIOV_CAPS_ENABLE_IOV (1 << 1) /* sr-iov is enabled on this GPU */
+#define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */
+#define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */
+#define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */
+#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */
+
+/* flags for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28)
+#define AMDGPU_RLCG_GC_WRITE (0x0 << 28)
+#define AMDGPU_RLCG_GC_READ (0x1 << 28)
+#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28)
+
+/* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000
+#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000
+#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
+
+#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
+
+/* all asic after AI use this offset */
+#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
+/* tonga/fiji use this offset */
+#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+
+enum amdgpu_sriov_vf_mode {
+ SRIOV_VF_MODE_BARE_METAL = 0,
+ SRIOV_VF_MODE_ONE_VF,
+ SRIOV_VF_MODE_MULTI_VF,
+};
+
+struct amdgpu_mm_table {
+ struct amdgpu_bo *bo;
+ uint32_t *cpu_addr;
+ uint64_t gpu_addr;
+};
+
+#define AMDGPU_VF_ERROR_ENTRY_SIZE 16
+
+/* struct error_entry - amdgpu VF error information. */
+struct amdgpu_vf_error_buffer {
+ struct mutex lock;
+ int read_count;
+ int write_count;
+ uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
+ uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
+ uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+};
+
+enum idh_request;
+
+/**
+ * struct amdgpu_virt_ops - amdgpu device virt operations
+ */
+struct amdgpu_virt_ops {
+ int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
+ int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
+ int (*req_init_data)(struct amdgpu_device *adev);
+ int (*reset_gpu)(struct amdgpu_device *adev);
+ int (*wait_reset)(struct amdgpu_device *adev);
+ void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
+ u32 data1, u32 data2, u32 data3);
+ void (*ras_poison_handler)(struct amdgpu_device *adev);
+};
+
+/*
+ * Firmware Reserve Frame buffer
+ */
+struct amdgpu_virt_fw_reserve {
+ struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
+ struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ unsigned int checksum_key;
+};
+
+/*
+ * Legacy GIM header
+ *
+ * Defination between PF and VF
+ * Structures forcibly aligned to 4 to keep the same style as PF.
+ */
+#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024)
+
+#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
+ (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
+
+enum AMDGIM_FEATURE_FLAG {
+ /* GIM supports feature of Error log collecting */
+ AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
+ /* GIM supports feature of loading uCodes */
+ AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
+ /* VRAM LOST by GIM */
+ AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
+ /* MM bandwidth */
+ AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
+ /* PP ONE VF MODE in GIM */
+ AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
+ /* Indirect Reg Access enabled */
+ AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+ /* AV1 Support MODE*/
+ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+};
+
+enum AMDGIM_REG_ACCESS_FLAG {
+ /* Use PSP to program IH_RB_CNTL */
+ AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+ /* Use RLC to program MMHUB regs */
+ AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
+ /* Use RLC to program GC regs */
+ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+};
+
+struct amdgim_pf2vf_info_v1 {
+ /* header contains size and version */
+ struct amd_sriov_msg_pf2vf_info_header header;
+ /* max_width * max_height */
+ unsigned int uvd_enc_max_pixels_count;
+ /* 16x16 pixels/sec, codec independent */
+ unsigned int uvd_enc_max_bandwidth;
+ /* max_width * max_height */
+ unsigned int vce_enc_max_pixels_count;
+ /* 16x16 pixels/sec, codec independent */
+ unsigned int vce_enc_max_bandwidth;
+ /* MEC FW position in kb from the start of visible frame buffer */
+ unsigned int mecfw_kboffset;
+ /* The features flags of the GIM driver supports. */
+ unsigned int feature_flags;
+ /* use private key from mailbox 2 to create chueksum */
+ unsigned int checksum;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v1 {
+ /* header contains size and version */
+ struct amd_sriov_msg_vf2pf_info_header header;
+ /* driver version */
+ char driver_version[64];
+ /* driver certification, 1=WHQL, 0=None */
+ unsigned int driver_cert;
+ /* guest OS type and version: need a define */
+ unsigned int os_info;
+ /* in the unit of 1M */
+ unsigned int fb_usage;
+ /* guest gfx engine usage percentage */
+ unsigned int gfx_usage;
+ /* guest gfx engine health percentage */
+ unsigned int gfx_health;
+ /* guest compute engine usage percentage */
+ unsigned int compute_usage;
+ /* guest compute engine health percentage */
+ unsigned int compute_health;
+ /* guest vce engine usage percentage. 0xffff means N/A. */
+ unsigned int vce_enc_usage;
+ /* guest vce engine health percentage. 0xffff means N/A. */
+ unsigned int vce_enc_health;
+ /* guest uvd engine usage percentage. 0xffff means N/A. */
+ unsigned int uvd_enc_usage;
+ /* guest uvd engine usage percentage. 0xffff means N/A. */
+ unsigned int uvd_enc_health;
+ unsigned int checksum;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v2 {
+ /* header contains size and version */
+ struct amd_sriov_msg_vf2pf_info_header header;
+ uint32_t checksum;
+ /* driver version */
+ uint8_t driver_version[64];
+ /* driver certification, 1=WHQL, 0=None */
+ uint32_t driver_cert;
+ /* guest OS type and version: need a define */
+ uint32_t os_info;
+ /* in the unit of 1M */
+ uint32_t fb_usage;
+ /* guest gfx engine usage percentage */
+ uint32_t gfx_usage;
+ /* guest gfx engine health percentage */
+ uint32_t gfx_health;
+ /* guest compute engine usage percentage */
+ uint32_t compute_usage;
+ /* guest compute engine health percentage */
+ uint32_t compute_health;
+ /* guest vce engine usage percentage. 0xffff means N/A. */
+ uint32_t vce_enc_usage;
+ /* guest vce engine health percentage. 0xffff means N/A. */
+ uint32_t vce_enc_health;
+ /* guest uvd engine usage percentage. 0xffff means N/A. */
+ uint32_t uvd_enc_usage;
+ /* guest uvd engine usage percentage. 0xffff means N/A. */
+ uint32_t uvd_enc_health;
+ uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
+} __aligned(4);
+
+struct amdgpu_virt_ras_err_handler_data {
+ /* point to bad page records array */
+ struct eeprom_table_record *bps;
+ /* point to reserved bo array */
+ struct amdgpu_bo **bps_bo;
+ /* the count of entries */
+ int count;
+ /* last reserved entry's index + 1 */
+ int last_reserved;
+};
+
+/* GPU virtualization */
+struct amdgpu_virt {
+ uint32_t caps;
+ struct amdgpu_bo *csa_obj;
+ void *csa_cpu_addr;
+ bool chained_ib_support;
+ uint32_t reg_val_offs;
+ struct amdgpu_irq_src ack_irq;
+ struct amdgpu_irq_src rcv_irq;
+ struct work_struct flr_work;
+ struct amdgpu_mm_table mm_table;
+ const struct amdgpu_virt_ops *ops;
+ struct amdgpu_vf_error_buffer vf_errors;
+ struct amdgpu_virt_fw_reserve fw_reserve;
+ uint32_t gim_feature;
+ uint32_t reg_access_mode;
+ int req_init_data_ver;
+ bool tdr_debug;
+ struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
+ bool ras_init_done;
+ uint32_t reg_access;
+
+ /* vf2pf message */
+ struct delayed_work vf2pf_work;
+ uint32_t vf2pf_update_interval_ms;
+
+ /* multimedia bandwidth config */
+ bool is_mm_bw_enabled;
+ uint32_t decode_max_dimension_pixels;
+ uint32_t decode_max_frame_pixels;
+ uint32_t encode_max_dimension_pixels;
+ uint32_t encode_max_frame_pixels;
+
+ /* the ucode id to signal the autoload */
+ uint32_t autoload_ucode_id;
+};
+
+struct amdgpu_video_codec_info;
+
+#define amdgpu_sriov_enabled(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
+
+#define amdgpu_sriov_vf(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF)
+
+#define amdgpu_sriov_bios(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)
+
+#define amdgpu_sriov_runtime(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
+
+#define amdgpu_sriov_fullaccess(adev) \
+(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
+
+#define amdgpu_sriov_reg_indirect_en(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
+
+#define amdgpu_sriov_reg_indirect_ih(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
+
+#define amdgpu_sriov_reg_indirect_mmhub(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_gc(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+
+#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
+ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
+
+#define amdgpu_passthrough(adev) \
+((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
+
+#define amdgpu_sriov_vf_mmio_access_protection(adev) \
+((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+
+static inline bool is_virtual_machine(void)
+{
+#if defined(CONFIG_X86)
+ return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#elif defined(CONFIG_ARM64)
+ return !is_kernel_in_hyp_mode();
+#else
+ return false;
+#endif
+}
+
+#define amdgpu_sriov_is_pp_one_vf(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+ ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
+#define amdgpu_sriov_is_normal(adev) \
+ ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
+void amdgpu_virt_init_setting(struct amdgpu_device *adev);
+void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t rreg1,
+ uint32_t ref, uint32_t mask);
+int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
+int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
+int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
+void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
+void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+ struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+ struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+ u32 offset, u32 value,
+ u32 acc_flags, u32 hwip, u32 xcc_id);
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+ u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
+ uint32_t ucode_id);
+void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
new file mode 100644
index 000000000..db6fc0cb1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "dce_v6_0.h"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#include "dce_v8_0.h"
+#endif
+#include "dce_v10_0.h"
+#include "dce_v11_0.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+#include "amdgpu_vkms.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "amdgpu_irq.h"
+
+/**
+ * DOC: amdgpu_vkms
+ *
+ * The amdgpu vkms interface provides a virtual KMS interface for several use
+ * cases: devices without display hardware, platforms where the actual display
+ * hardware is not useful (e.g., servers), SR-IOV virtual functions, device
+ * emulation/simulation, and device bring up prior to display hardware being
+ * usable. We previously emulated a legacy KMS interface, but there was a desire
+ * to move to the atomic KMS interface. The vkms driver did everything we
+ * needed, but we wanted KMS support natively in the driver without buffer
+ * sharing and the ability to support an instance of VKMS per device. We first
+ * looked at splitting vkms into a stub driver and a helper module that other
+ * drivers could use to implement a virtual display, but this strategy ended up
+ * being messy due to driver specific callbacks needed for buffer management.
+ * Ultimately, it proved easier to import the vkms code as it mostly used core
+ * drm helpers anyway.
+ */
+
+static const u32 amdgpu_vkms_formats[] = {
+ DRM_FORMAT_XRGB8888,
+};
+
+static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
+{
+ struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer);
+ struct drm_crtc *crtc = &amdgpu_crtc->base;
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ u64 ret_overrun;
+ bool ret;
+
+ ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
+ output->period_ns);
+ if (ret_overrun != 1)
+ DRM_WARN("%s: vblank timer overrun\n", __func__);
+
+ ret = drm_crtc_handle_vblank(crtc);
+ /* Don't queue timer again when vblank is disabled. */
+ if (!ret)
+ return HRTIMER_NORESTART;
+
+ return HRTIMER_RESTART;
+}
+
+static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = drm_crtc_index(crtc);
+ struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_calc_timestamping_constants(crtc, &crtc->mode);
+
+ out->period_ns = ktime_set(0, vblank->framedur_ns);
+ hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
+}
+
+static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
+ int *max_error,
+ ktime_t *vblank_time,
+ bool in_vblank_irq)
+{
+ struct drm_device *dev = crtc->dev;
+ unsigned int pipe = crtc->index;
+ struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+ struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!READ_ONCE(vblank->enabled)) {
+ *vblank_time = ktime_get();
+ return true;
+ }
+
+ *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires);
+
+ if (WARN_ON(*vblank_time == vblank->time))
+ return true;
+
+ /*
+ * To prevent races we roll the hrtimer forward before we do any
+ * interrupt processing - this is how real hw works (the interrupt is
+ * only generated after all the vblank registers are updated) and what
+ * the vblank core expects. Therefore we need to always correct the
+ * timestampe by one frame.
+ */
+ *vblank_time -= output->period_ns;
+
+ return true;
+}
+
+static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = {
+ .set_config = drm_atomic_helper_set_config,
+ .destroy = drm_crtc_cleanup,
+ .page_flip = drm_atomic_helper_page_flip,
+ .reset = drm_atomic_helper_crtc_reset,
+ .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+ .enable_vblank = amdgpu_vkms_enable_vblank,
+ .disable_vblank = amdgpu_vkms_disable_vblank,
+ .get_vblank_timestamp = amdgpu_vkms_get_vblank_timestamp,
+};
+
+static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ drm_crtc_vblank_on(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ drm_crtc_vblank_off(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ unsigned long flags;
+ if (crtc->state->event) {
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+ if (drm_crtc_vblank_get(crtc) != 0)
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+ else
+ drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ crtc->state->event = NULL;
+ }
+}
+
+static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = {
+ .atomic_flush = amdgpu_vkms_crtc_atomic_flush,
+ .atomic_enable = amdgpu_vkms_crtc_atomic_enable,
+ .atomic_disable = amdgpu_vkms_crtc_atomic_disable,
+};
+
+static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+ struct drm_plane *primary, struct drm_plane *cursor)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ int ret;
+
+ ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor,
+ &amdgpu_vkms_crtc_funcs, NULL);
+ if (ret) {
+ DRM_ERROR("Failed to init CRTC\n");
+ return ret;
+ }
+
+ drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs);
+
+ amdgpu_crtc->crtc_id = drm_crtc_index(crtc);
+ adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc;
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
+
+ hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate;
+
+ return ret;
+}
+
+static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = drm_connector_cleanup,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector)
+{
+ struct drm_device *dev = connector->dev;
+ struct drm_display_mode *mode = NULL;
+ unsigned i;
+ static const struct mode_size {
+ int w;
+ int h;
+ } common_modes[] = {
+ { 640, 480},
+ { 720, 480},
+ { 800, 600},
+ { 848, 480},
+ {1024, 768},
+ {1152, 768},
+ {1280, 720},
+ {1280, 800},
+ {1280, 854},
+ {1280, 960},
+ {1280, 1024},
+ {1440, 900},
+ {1400, 1050},
+ {1680, 1050},
+ {1600, 1200},
+ {1920, 1080},
+ {1920, 1200},
+ {2560, 1440},
+ {4096, 3112},
+ {3656, 2664},
+ {3840, 2160},
+ {4096, 2160},
+ };
+
+ for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
+ mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ continue;
+ drm_mode_probed_add(connector, mode);
+ }
+
+ drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF);
+
+ return ARRAY_SIZE(common_modes);
+}
+
+static const struct drm_connector_helper_funcs amdgpu_vkms_conn_helper_funcs = {
+ .get_modes = amdgpu_vkms_conn_get_modes,
+};
+
+static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = drm_plane_cleanup,
+ .reset = drm_atomic_helper_plane_reset,
+ .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane,
+ struct drm_atomic_state *old_state)
+{
+ return;
+}
+
+static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+ plane);
+ struct drm_crtc_state *crtc_state;
+ int ret;
+
+ if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc))
+ return 0;
+
+ crtc_state = drm_atomic_get_crtc_state(state,
+ new_plane_state->crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+
+ ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state,
+ DRM_PLANE_NO_SCALING,
+ DRM_PLANE_NO_SCALING,
+ false, true);
+ if (ret != 0)
+ return ret;
+
+ /* for now primary plane must be visible and full screen */
+ if (!new_plane_state->visible)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
+ struct drm_plane_state *new_state)
+{
+ struct amdgpu_framebuffer *afb;
+ struct drm_gem_object *obj;
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *rbo;
+ uint32_t domain;
+ int r;
+
+ if (!new_state->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+ afb = to_amdgpu_framebuffer(new_state->fb);
+ obj = new_state->fb->obj[0];
+ rbo = gem_to_amdgpu_bo(obj);
+ adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+ r = amdgpu_bo_reserve(rbo, true);
+ if (r) {
+ dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+ return r;
+ }
+
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
+ if (plane->type != DRM_PLANE_TYPE_CURSOR)
+ domain = amdgpu_display_supported_domains(adev, rbo->flags);
+ else
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+ r = amdgpu_bo_pin(rbo, domain);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", rbo);
+ goto error_unpin;
+ }
+
+ amdgpu_bo_unreserve(rbo);
+
+ afb->address = amdgpu_bo_gpu_offset(rbo);
+
+ amdgpu_bo_ref(rbo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
+}
+
+static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ struct amdgpu_bo *rbo;
+ int r;
+
+ if (!old_state->fb)
+ return;
+
+ rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ r = amdgpu_bo_reserve(rbo, false);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve rbo before unpin\n");
+ return;
+ }
+
+ amdgpu_bo_unpin(rbo);
+ amdgpu_bo_unreserve(rbo);
+ amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_plane_helper_funcs amdgpu_vkms_primary_helper_funcs = {
+ .atomic_update = amdgpu_vkms_plane_atomic_update,
+ .atomic_check = amdgpu_vkms_plane_atomic_check,
+ .prepare_fb = amdgpu_vkms_prepare_fb,
+ .cleanup_fb = amdgpu_vkms_cleanup_fb,
+};
+
+static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
+ enum drm_plane_type type,
+ int index)
+{
+ struct drm_plane *plane;
+ int ret;
+
+ plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+ if (!plane)
+ return ERR_PTR(-ENOMEM);
+
+ ret = drm_universal_plane_init(dev, plane, 1 << index,
+ &amdgpu_vkms_plane_funcs,
+ amdgpu_vkms_formats,
+ ARRAY_SIZE(amdgpu_vkms_formats),
+ NULL, type, NULL);
+ if (ret) {
+ kfree(plane);
+ return ERR_PTR(ret);
+ }
+
+ drm_plane_helper_add(plane, &amdgpu_vkms_primary_helper_funcs);
+
+ return plane;
+}
+
+static int amdgpu_vkms_output_init(struct drm_device *dev, struct
+ amdgpu_vkms_output *output, int index)
+{
+ struct drm_connector *connector = &output->connector;
+ struct drm_encoder *encoder = &output->encoder;
+ struct drm_crtc *crtc = &output->crtc.base;
+ struct drm_plane *primary, *cursor = NULL;
+ int ret;
+
+ primary = amdgpu_vkms_plane_init(dev, DRM_PLANE_TYPE_PRIMARY, index);
+ if (IS_ERR(primary))
+ return PTR_ERR(primary);
+
+ ret = amdgpu_vkms_crtc_init(dev, crtc, primary, cursor);
+ if (ret)
+ goto err_crtc;
+
+ ret = drm_connector_init(dev, connector, &amdgpu_vkms_connector_funcs,
+ DRM_MODE_CONNECTOR_VIRTUAL);
+ if (ret) {
+ DRM_ERROR("Failed to init connector\n");
+ goto err_connector;
+ }
+
+ drm_connector_helper_add(connector, &amdgpu_vkms_conn_helper_funcs);
+
+ ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL);
+ if (ret) {
+ DRM_ERROR("Failed to init encoder\n");
+ goto err_encoder;
+ }
+ encoder->possible_crtcs = 1 << index;
+
+ ret = drm_connector_attach_encoder(connector, encoder);
+ if (ret) {
+ DRM_ERROR("Failed to attach connector to encoder\n");
+ goto err_attach;
+ }
+
+ drm_mode_config_reset(dev);
+
+ return 0;
+
+err_attach:
+ drm_encoder_cleanup(encoder);
+
+err_encoder:
+ drm_connector_cleanup(connector);
+
+err_connector:
+ drm_crtc_cleanup(crtc);
+
+err_crtc:
+ drm_plane_cleanup(primary);
+
+ return ret;
+}
+
+const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
+ .fb_create = amdgpu_display_user_framebuffer_create,
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = drm_atomic_helper_commit,
+};
+
+static int amdgpu_vkms_sw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
+ if (!adev->amdgpu_vkms_output)
+ return -ENOMEM;
+
+ adev_to_drm(adev)->max_vblank_count = 0;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs;
+
+ adev_to_drm(adev)->mode_config.max_width = XRES_MAX;
+ adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
+
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ /* allocate crtcs, encoders, connectors */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i);
+ if (r)
+ return r;
+ }
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
+}
+
+static int amdgpu_vkms_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i = 0;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ if (adev->mode_info.crtcs[i])
+ hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+ drm_mode_config_cleanup(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = false;
+
+ kfree(adev->mode_info.bios_hardcoded_edid);
+ kfree(adev->amdgpu_vkms_output);
+ return 0;
+}
+
+static int amdgpu_vkms_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ dce_v6_0_disable_dce(adev);
+ break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dce_v8_0_disable_dce(adev);
+ break;
+#endif
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ dce_v10_0_disable_dce(adev);
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_VEGAM:
+ dce_v11_0_disable_dce(adev);
+ break;
+ case CHIP_TOPAZ:
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_HAINAN:
+#endif
+ /* no DCE */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int amdgpu_vkms_hw_fini(void *handle)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = drm_mode_config_helper_suspend(adev_to_drm(adev));
+ if (r)
+ return r;
+ return amdgpu_vkms_hw_fini(handle);
+}
+
+static int amdgpu_vkms_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_vkms_hw_init(handle);
+ if (r)
+ return r;
+ return drm_mode_config_helper_resume(adev_to_drm(adev));
+}
+
+static bool amdgpu_vkms_is_idle(void *handle)
+{
+ return true;
+}
+
+static int amdgpu_vkms_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int amdgpu_vkms_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
+ .name = "amdgpu_vkms",
+ .early_init = NULL,
+ .late_init = NULL,
+ .sw_init = amdgpu_vkms_sw_init,
+ .sw_fini = amdgpu_vkms_sw_fini,
+ .hw_init = amdgpu_vkms_hw_init,
+ .hw_fini = amdgpu_vkms_hw_fini,
+ .suspend = amdgpu_vkms_suspend,
+ .resume = amdgpu_vkms_resume,
+ .is_idle = amdgpu_vkms_is_idle,
+ .wait_for_idle = amdgpu_vkms_wait_for_idle,
+ .soft_reset = amdgpu_vkms_soft_reset,
+ .set_clockgating_state = amdgpu_vkms_set_clockgating_state,
+ .set_powergating_state = amdgpu_vkms_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version amdgpu_vkms_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &amdgpu_vkms_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
new file mode 100644
index 000000000..4f8722ff3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _AMDGPU_VKMS_H_
+#define _AMDGPU_VKMS_H_
+
+#define XRES_DEF 1024
+#define YRES_DEF 768
+
+#define XRES_MAX 16384
+#define YRES_MAX 16384
+
+#define drm_crtc_to_amdgpu_vkms_output(target) \
+ container_of(target, struct amdgpu_vkms_output, crtc.base)
+
+extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block;
+
+struct amdgpu_vkms_output {
+ struct amdgpu_crtc crtc;
+ struct drm_encoder encoder;
+ struct drm_connector connector;
+ ktime_t period_ns;
+ struct drm_pending_vblank_event *event;
+};
+
+#endif /* _AMDGPU_VKMS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
new file mode 100644
index 000000000..9fe1278fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -0,0 +1,2735 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+#include <linux/dma-fence-array.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/idr.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
+#include "kfd_svm.h"
+
+/**
+ * DOC: GPUVM
+ *
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time. The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
+ * can be mapped as snooped (cached system pages) or unsnooped
+ * (uncached system pages).
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID. When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
+ * buffer. VMIDs are allocated dynamically as commands are submitted.
+ * The userspace drivers maintain their own address space and the kernel
+ * sets up their pages tables accordingly when they submit their
+ * command buffers and a VMID is assigned.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family. GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special. It is the GPUVM used for the kernel driver. In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures. There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present). These apertures provide direct access to these memories without
+ * incurring the overhead of a page table. VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space. The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process. If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
+ */
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
+ START, LAST, static, amdgpu_vm_it)
+
+#undef START
+#undef LAST
+
+/**
+ * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
+ */
+struct amdgpu_prt_cb {
+
+ /**
+ * @adev: amdgpu device
+ */
+ struct amdgpu_device *adev;
+
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
+};
+
+/**
+ * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
+ */
+struct amdgpu_vm_tlb_seq_struct {
+ /**
+ * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+ */
+ struct amdgpu_vm *vm;
+
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
+};
+
+/**
+ * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm pointer
+ * @pasid: the pasid the VM is using on this GPU
+ *
+ * Set the pasid this VM is using on this GPU, can also be used to remove the
+ * pasid by passing in zero.
+ *
+ */
+int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ u32 pasid)
+{
+ int r;
+
+ if (vm->pasid == pasid)
+ return 0;
+
+ if (vm->pasid) {
+ r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
+ if (r < 0)
+ return r;
+
+ vm->pasid = 0;
+ }
+
+ if (pasid) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
+ GFP_KERNEL));
+ if (r < 0)
+ return r;
+
+ vm->pasid = pasid;
+ }
+
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+ struct amdgpu_vm *vm = vm_bo->vm;
+ struct amdgpu_bo *bo = vm_bo->bo;
+
+ vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm->evicted);
+ else
+ list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM BOs which are moved, but that change is not yet reflected
+ * in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
+ vm_bo->moved = false;
+}
+
+/**
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
+ *
+ * @vm_bo: vm_bo which is now invalidated
+ *
+ * State for normal BOs which are invalidated and that change not yet reflected
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ * For the root PD, just move to idle state.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
+ amdgpu_vm_bo_idle(vm_bo);
+ }
+}
+
+/**
+ * amdgpu_vm_bo_done - vm_bo is done
+ *
+ * @vm_bo: vm_bo which is now done
+ *
+ * State for normal BOs which are invalidated and that change has been updated
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->done);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
+ *
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
+ */
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_bo_base *vm_bo, *tmp;
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->done, &vm->invalidated);
+ list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+ vm_bo->moved = true;
+ list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = vm_bo->bo;
+
+ vm_bo->moved = true;
+ if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ else if (bo->parent)
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+ }
+ spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+ base->vm = vm;
+ base->bo = bo;
+ base->next = NULL;
+ INIT_LIST_HEAD(&base->vm_status);
+
+ if (!bo)
+ return;
+ base->next = bo->vm_bo;
+ bo->vm_bo = base;
+
+ if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ return;
+
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
+ ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
+ if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
+ amdgpu_vm_bo_relocated(base);
+ else
+ amdgpu_vm_bo_idle(base);
+
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
+ return;
+
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ amdgpu_vm_bo_evicted(base);
+}
+
+/**
+ * amdgpu_vm_lock_pd - lock PD in drm_exec
+ *
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the VM root PD in the DRM execution context.
+ */
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ /* We need at least two fences for the VM PD/PT updates */
+ return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
+ 2 + num_fences);
+}
+
+/**
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ *
+ * Move all BOs to the end of LRU and remember their positions to put them
+ * together.
+ */
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ spin_lock(&adev->mman.bdev.lru_lock);
+ ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+ spin_unlock(&adev->mman.bdev.lru_lock);
+}
+
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ int r;
+
+ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error;
+
+ return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+
+error:
+ drm_sched_entity_destroy(&vm->immediate);
+ return r;
+}
+
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+ drm_sched_entity_destroy(&vm->immediate);
+ drm_sched_entity_destroy(&vm->delayed);
+}
+
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+ if (!vm)
+ return result;
+
+ result += vm->generation;
+ /* Add one if the page tables will be re-generated on next CS */
+ if (drm_sched_entity_error(&vm->delayed))
+ ++result;
+
+ return result;
+}
+
+/**
+ * amdgpu_vm_validate_pt_bos - validate the page table BOs
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ * @validate: callback to do the validation
+ * @param: parameter for the validation callback
+ *
+ * Validate the page table BOs on command submission if neccessary.
+ *
+ * Returns:
+ * Validation result.
+ */
+int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int (*validate)(void *p, struct amdgpu_bo *bo),
+ void *param)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo *shadow;
+ struct amdgpu_bo *bo;
+ int r;
+
+ if (drm_sched_entity_error(&vm->delayed)) {
+ ++vm->generation;
+ amdgpu_vm_bo_reset_state_machine(vm);
+ amdgpu_vm_fini_entities(vm);
+ r = amdgpu_vm_init_entities(adev, vm);
+ if (r)
+ return r;
+ }
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ shadow = amdgpu_bo_shadowed(bo);
+
+ r = validate(param, bo);
+ if (r)
+ return r;
+ if (shadow) {
+ r = validate(param, shadow);
+ if (r)
+ return r;
+ }
+
+ if (bo->tbo.type != ttm_bo_type_kernel) {
+ amdgpu_vm_bo_moved(bo_base);
+ } else {
+ vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
+ amdgpu_vm_bo_relocated(bo_base);
+ }
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ amdgpu_vm_eviction_lock(vm);
+ vm->evicting = false;
+ amdgpu_vm_eviction_unlock(vm);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_ready - check VM is ready for updates
+ *
+ * @vm: VM to check
+ *
+ * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+ * True if VM is not evicting.
+ */
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+{
+ bool empty;
+ bool ret;
+
+ amdgpu_vm_eviction_lock(vm);
+ ret = !vm->evicting;
+ amdgpu_vm_eviction_unlock(vm);
+
+ spin_lock(&vm->status_lock);
+ empty = list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
+
+ return ret && empty;
+}
+
+/**
+ * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
+ *
+ * @adev: amdgpu_device pointer
+ */
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
+{
+ const struct amdgpu_ip_block *ip_block;
+ bool has_compute_vm_bug;
+ struct amdgpu_ring *ring;
+ int i;
+
+ has_compute_vm_bug = false;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block) {
+ /* Compute has a VM bug for GFX version < 7.
+ Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
+ if (ip_block->version->major <= 7)
+ has_compute_vm_bug = true;
+ else if (ip_block->version->major == 8)
+ if (adev->gfx.mec_fw_version < 673)
+ has_compute_vm_bug = true;
+ }
+
+ for (i = 0; i < adev->num_rings; i++) {
+ ring = adev->rings[i];
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ /* only compute rings */
+ ring->has_compute_vm_bug = has_compute_vm_bug;
+ else
+ ring->has_compute_vm_bug = false;
+ }
+}
+
+/**
+ * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
+ *
+ * @ring: ring on which the job will be submitted
+ * @job: job to submit
+ *
+ * Returns:
+ * True if sync is needed.
+ */
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+ if (job->vmid == 0)
+ return false;
+
+ if (job->vm_needs_flush || ring->has_compute_vm_bug)
+ return true;
+
+ if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+ return true;
+
+ if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
+ return true;
+
+ return false;
+}
+
+/**
+ * amdgpu_vm_flush - hardware flush the vm
+ *
+ * @ring: ring to use for flush
+ * @job: related job
+ * @need_pipe_sync: is pipe sync needed
+ *
+ * Emit a VM flush when it is necessary.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ bool need_pipe_sync)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->vm_hub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
+ bool spm_update_needed = job->spm_update_needed;
+ bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+ job->gds_switch_needed;
+ bool vm_flush_needed = job->vm_needs_flush;
+ struct dma_fence *fence = NULL;
+ bool pasid_mapping_needed = false;
+ unsigned patch_offset = 0;
+ int r;
+
+ if (amdgpu_vmid_had_gpu_reset(adev, id)) {
+ gds_switch_needed = true;
+ vm_flush_needed = true;
+ pasid_mapping_needed = true;
+ spm_update_needed = true;
+ }
+
+ mutex_lock(&id_mgr->lock);
+ if (id->pasid != job->pasid || !id->pasid_mapping ||
+ !dma_fence_is_signaled(id->pasid_mapping))
+ pasid_mapping_needed = true;
+ mutex_unlock(&id_mgr->lock);
+
+ gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+ vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
+ job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
+ pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+ ring->funcs->emit_wreg;
+
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ return 0;
+
+ amdgpu_ring_ib_begin(ring);
+ if (ring->funcs->init_cond_exec)
+ patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+ if (need_pipe_sync)
+ amdgpu_ring_emit_pipeline_sync(ring);
+
+ if (vm_flush_needed) {
+ trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+ amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+ }
+
+ if (pasid_mapping_needed)
+ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
+
+ if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
+
+ if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+ gds_switch_needed) {
+ amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+ job->gds_size, job->gws_base,
+ job->gws_size, job->oa_base,
+ job->oa_size);
+ }
+
+ if (vm_flush_needed || pasid_mapping_needed) {
+ r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+ if (r)
+ return r;
+ }
+
+ if (vm_flush_needed) {
+ mutex_lock(&id_mgr->lock);
+ dma_fence_put(id->last_flush);
+ id->last_flush = dma_fence_get(fence);
+ id->current_gpu_reset_count =
+ atomic_read(&adev->gpu_reset_counter);
+ mutex_unlock(&id_mgr->lock);
+ }
+
+ if (pasid_mapping_needed) {
+ mutex_lock(&id_mgr->lock);
+ id->pasid = job->pasid;
+ dma_fence_put(id->pasid_mapping);
+ id->pasid_mapping = dma_fence_get(fence);
+ mutex_unlock(&id_mgr->lock);
+ }
+ dma_fence_put(fence);
+
+ if (ring->funcs->patch_cond_exec)
+ amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
+ /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+ if (ring->funcs->emit_switch_buffer) {
+ amdgpu_ring_emit_switch_buffer(ring);
+ amdgpu_ring_emit_switch_buffer(ring);
+ }
+ amdgpu_ring_ib_end(ring);
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
+ *
+ * @vm: requested vm
+ * @bo: requested buffer object
+ *
+ * Find @bo inside the requested vm.
+ * Search inside the @bos vm list for the requested vm
+ * Returns the found bo_va or NULL if none is found
+ *
+ * Object has to be reserved!
+ *
+ * Returns:
+ * Found bo_va or NULL.
+ */
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next) {
+ if (base->vm != vm)
+ continue;
+
+ return container_of(base, struct amdgpu_bo_va, base);
+ }
+ return NULL;
+}
+
+/**
+ * amdgpu_vm_map_gart - Resolve gart mapping of addr
+ *
+ * @pages_addr: optional DMA address to use for lookup
+ * @addr: the unmapped addr
+ *
+ * Look up the physical address of the page that the pte resolves
+ * to.
+ *
+ * Returns:
+ * The pointer for the page table entry.
+ */
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
+{
+ uint64_t result;
+
+ /* page table offset */
+ result = pages_addr[addr >> PAGE_SHIFT];
+
+ /* in case cpu page size != gpu page size*/
+ result |= addr & (~PAGE_MASK);
+
+ result &= 0xFFFFFFFFFFFFF000ULL;
+
+ return result;
+}
+
+/**
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @immediate: submit immediately to the paging queue
+ *
+ * Makes sure all directories are up to date.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool immediate)
+{
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_vm_bo_base *entry;
+ bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
+ int r, idx;
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
+
+ if (list_empty(&relocated))
+ return 0;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+
+ r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ if (r)
+ goto error;
+
+ list_for_each_entry(entry, &relocated, vm_status) {
+ /* vm_flush_needed after updating moved PDEs */
+ flush_tlb_needed |= entry->moved;
+
+ r = amdgpu_vm_pde_update(&params, entry);
+ if (r)
+ goto error;
+ }
+
+ r = vm->update_funcs->commit(&params, &vm->last_update);
+ if (r)
+ goto error;
+
+ if (flush_tlb_needed)
+ atomic64_inc(&vm->tlb_seq);
+
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
+ vm_status);
+ amdgpu_vm_bo_idle(entry);
+ }
+
+error:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
+{
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+
+ tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+ atomic64_inc(&tlb_cb->vm->tlb_seq);
+ kfree(tlb_cb);
+}
+
+/**
+ * amdgpu_vm_update_range - update a range in the vm page table
+ *
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
+ * @immediate: immediate submission in a page fault
+ * @unlocked: unlocked invalidation during MM callback
+ * @flush_tlb: trigger tlb invalidation after update completed
+ * @resv: fences we need to sync to
+ * @start: start of mapped range
+ * @last: last mapped entry
+ * @flags: flags for the entries
+ * @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
+ * @res: ttm_resource to map
+ * @pages_addr: DMA addresses to use for mapping
+ * @fence: optional resulting fence
+ *
+ * Fill in the page table entries between @start and @last.
+ *
+ * Returns:
+ * 0 for success, negative erro code for failure.
+ */
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence)
+{
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+ struct amdgpu_res_cursor cursor;
+ enum amdgpu_sync_mode sync_mode;
+ int r, idx;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+ if (!tlb_cb) {
+ r = -ENOMEM;
+ goto error_unlock;
+ }
+
+ /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
+ * heavy-weight flush TLB unconditionally.
+ */
+ flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
+
+ /*
+ * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+ */
+ flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+ params.pages_addr = pages_addr;
+ params.unlocked = unlocked;
+
+ /* Implicitly sync to command submissions in the same VM before
+ * unmapping. Sync to moving fences before mapping.
+ */
+ if (!(flags & AMDGPU_PTE_VALID))
+ sync_mode = AMDGPU_SYNC_EQ_OWNER;
+ else
+ sync_mode = AMDGPU_SYNC_EXPLICIT;
+
+ amdgpu_vm_eviction_lock(vm);
+ if (vm->evicting) {
+ r = -EBUSY;
+ goto error_free;
+ }
+
+ if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
+ struct dma_fence *tmp = dma_fence_get_stub();
+
+ amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
+ swap(vm->last_unlocked, tmp);
+ dma_fence_put(tmp);
+ }
+
+ r = vm->update_funcs->prepare(&params, resv, sync_mode);
+ if (r)
+ goto error_free;
+
+ amdgpu_res_first(pages_addr ? NULL : res, offset,
+ (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
+ while (cursor.remaining) {
+ uint64_t tmp, num_entries, addr;
+
+ num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
+ if (pages_addr) {
+ bool contiguous = true;
+
+ if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+ uint64_t pfn = cursor.start >> PAGE_SHIFT;
+ uint64_t count;
+
+ contiguous = pages_addr[pfn + 1] ==
+ pages_addr[pfn] + PAGE_SIZE;
+
+ tmp = num_entries /
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ for (count = 2; count < tmp; ++count) {
+ uint64_t idx = pfn + count;
+
+ if (contiguous != (pages_addr[idx] ==
+ pages_addr[idx - 1] + PAGE_SIZE))
+ break;
+ }
+ if (!contiguous)
+ count--;
+ num_entries = count *
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ }
+
+ if (!contiguous) {
+ addr = cursor.start;
+ params.pages_addr = pages_addr;
+ } else {
+ addr = pages_addr[cursor.start >> PAGE_SHIFT];
+ params.pages_addr = NULL;
+ }
+
+ } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
+ addr = vram_base + cursor.start;
+ } else {
+ addr = 0;
+ }
+
+ tmp = start + num_entries;
+ r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
+ if (r)
+ goto error_free;
+
+ amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
+ start = tmp;
+ }
+
+ r = vm->update_funcs->commit(&params, fence);
+
+ if (flush_tlb || params.table_freed) {
+ tlb_cb->vm = vm;
+ if (fence && *fence &&
+ !dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
+ } else {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+ }
+ tlb_cb = NULL;
+ }
+
+error_free:
+ kfree(tlb_cb);
+
+error_unlock:
+ amdgpu_vm_eviction_unlock(vm);
+ drm_dev_exit(idx);
+ return r;
+}
+
+static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
+ struct amdgpu_mem_stats *stats)
+{
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+
+ if (!bo)
+ return;
+
+ /*
+ * For now ignore BOs which are currently locked and potentially
+ * changing their location.
+ */
+ if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
+ !dma_resv_trylock(bo->tbo.base.resv))
+ return;
+
+ amdgpu_bo_get_memory(bo, stats);
+ if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ dma_resv_unlock(bo->tbo.base.resv);
+}
+
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats *stats)
+{
+ struct amdgpu_bo_va *bo_va, *tmp;
+
+ spin_lock(&vm->status_lock);
+ list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+
+ list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
+ amdgpu_vm_bo_get_memory(bo_va, stats);
+ spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update - update all BO mappings in the vm page table
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: requested BO and VM object
+ * @clear: if true clear the entries
+ *
+ * Fill in the page table entries for @bo_va.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
+ bool clear)
+{
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo_va_mapping *mapping;
+ dma_addr_t *pages_addr = NULL;
+ struct ttm_resource *mem;
+ struct dma_fence **last_update;
+ bool flush_tlb = clear;
+ struct dma_resv *resv;
+ uint64_t vram_base;
+ uint64_t flags;
+ int r;
+
+ if (clear || !bo) {
+ mem = NULL;
+ resv = vm->root.bo->tbo.base.resv;
+ } else {
+ struct drm_gem_object *obj = &bo->tbo.base;
+
+ resv = bo->tbo.base.resv;
+ if (obj->import_attach && bo_va->is_xgmi) {
+ struct dma_buf *dma_buf = obj->import_attach->dmabuf;
+ struct drm_gem_object *gobj = dma_buf->priv;
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+ if (abo->tbo.resource &&
+ abo->tbo.resource->mem_type == TTM_PL_VRAM)
+ bo = gem_to_amdgpu_bo(gobj);
+ }
+ mem = bo->tbo.resource;
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_PREEMPT))
+ pages_addr = bo->tbo.ttm->dma_address;
+ }
+
+ if (bo) {
+ struct amdgpu_device *bo_adev;
+
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
+
+ if (amdgpu_bo_encrypted(bo))
+ flags |= AMDGPU_PTE_TMZ;
+
+ bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ vram_base = bo_adev->vm_manager.vram_base_offset;
+ } else {
+ flags = 0x0;
+ vram_base = 0;
+ }
+
+ if (clear || (bo && bo->tbo.base.resv ==
+ vm->root.bo->tbo.base.resv))
+ last_update = &vm->last_update;
+ else
+ last_update = &bo_va->last_pt_update;
+
+ if (!clear && bo_va->base.moved) {
+ flush_tlb = true;
+ list_splice_init(&bo_va->valids, &bo_va->invalids);
+
+ } else if (bo_va->cleared != clear) {
+ list_splice_init(&bo_va->valids, &bo_va->invalids);
+ }
+
+ list_for_each_entry(mapping, &bo_va->invalids, list) {
+ uint64_t update_flags = flags;
+
+ /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+ * but in case of something, we filter the flags in first place
+ */
+ if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ update_flags &= ~AMDGPU_PTE_READABLE;
+ if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ update_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+ /* Apply ASIC specific mapping flags */
+ amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+
+ trace_amdgpu_vm_bo_update(mapping);
+
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+ resv, mapping->start, mapping->last,
+ update_flags, mapping->offset,
+ vram_base, mem, pages_addr,
+ last_update);
+ if (r)
+ return r;
+ }
+
+ /* If the BO is not in its preferred location add it back to
+ * the evicted list so that it gets validated again on the
+ * next command submission.
+ */
+ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+ uint32_t mem_type = bo->tbo.resource->mem_type;
+
+ if (!(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(mem_type)))
+ amdgpu_vm_bo_evicted(&bo_va->base);
+ else
+ amdgpu_vm_bo_idle(&bo_va->base);
+ } else {
+ amdgpu_vm_bo_done(&bo_va->base);
+ }
+
+ list_splice_init(&bo_va->invalids, &bo_va->valids);
+ bo_va->cleared = clear;
+ bo_va->base.moved = false;
+
+ if (trace_amdgpu_vm_bo_mapping_enabled()) {
+ list_for_each_entry(mapping, &bo_va->valids, list)
+ trace_amdgpu_vm_bo_mapping(mapping);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_update_prt_state - update the global PRT state
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
+{
+ unsigned long flags;
+ bool enable;
+
+ spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
+ enable = !!atomic_read(&adev->vm_manager.num_prt_users);
+ adev->gmc.gmc_funcs->set_prt(adev, enable);
+ spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
+}
+
+/**
+ * amdgpu_vm_prt_get - add a PRT user
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.gmc_funcs->set_prt)
+ return;
+
+ if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
+ amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_put - drop a PRT user
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
+{
+ if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
+ amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_cb - callback for updating the PRT status
+ *
+ * @fence: fence for the callback
+ * @_cb: the callback function
+ */
+static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
+{
+ struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
+
+ amdgpu_vm_prt_put(cb->adev);
+ kfree(cb);
+}
+
+/**
+ * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ *
+ * @adev: amdgpu_device pointer
+ * @fence: fence for the callback
+ */
+static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
+ struct dma_fence *fence)
+{
+ struct amdgpu_prt_cb *cb;
+
+ if (!adev->gmc.gmc_funcs->set_prt)
+ return;
+
+ cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
+ if (!cb) {
+ /* Last resort when we are OOM */
+ if (fence)
+ dma_fence_wait(fence, false);
+
+ amdgpu_vm_prt_put(adev);
+ } else {
+ cb->adev = adev;
+ if (!fence || dma_fence_add_callback(fence, &cb->cb,
+ amdgpu_vm_prt_cb))
+ amdgpu_vm_prt_cb(fence, &cb->cb);
+ }
+}
+
+/**
+ * amdgpu_vm_free_mapping - free a mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @mapping: mapping to be freed
+ * @fence: fence of the unmap operation
+ *
+ * Free a mapping and make sure we decrease the PRT usage count if applicable.
+ */
+static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va_mapping *mapping,
+ struct dma_fence *fence)
+{
+ if (mapping->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_add_prt_cb(adev, fence);
+ kfree(mapping);
+}
+
+/**
+ * amdgpu_vm_prt_fini - finish all prt mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Register a cleanup callback to disable PRT support after VM dies.
+ */
+static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ /* Add a callback for each fence in the reservation object */
+ amdgpu_vm_prt_get(adev);
+ amdgpu_vm_add_prt_cb(adev, fence);
+ }
+}
+
+/**
+ * amdgpu_vm_clear_freed - clear freed BOs in the PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
+ *
+ * Make sure all freed BOs are cleared in the PT.
+ * PTs have to be reserved and mutex must be locked!
+ *
+ * Returns:
+ * 0 for success.
+ *
+ */
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence)
+{
+ struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+ struct amdgpu_bo_va_mapping *mapping;
+ uint64_t init_pte_value = 0;
+ struct dma_fence *f = NULL;
+ int r;
+
+ while (!list_empty(&vm->freed)) {
+ mapping = list_first_entry(&vm->freed,
+ struct amdgpu_bo_va_mapping, list);
+ list_del(&mapping->list);
+
+ if (vm->pte_support_ats &&
+ mapping->start < AMDGPU_GMC_HOLE_START)
+ init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
+
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
+ mapping->start, mapping->last,
+ init_pte_value, 0, 0, NULL, NULL,
+ &f);
+ amdgpu_vm_free_mapping(adev, vm, mapping, f);
+ if (r) {
+ dma_fence_put(f);
+ return r;
+ }
+ }
+
+ if (fence && f) {
+ dma_fence_put(*fence);
+ *fence = f;
+ } else {
+ dma_fence_put(f);
+ }
+
+ return 0;
+
+}
+
+/**
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Make sure all BOs which are moved are updated in the PTs.
+ *
+ * Returns:
+ * 0 for success.
+ *
+ * PTs have to be reserved!
+ */
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ struct amdgpu_bo_va *bo_va;
+ struct dma_resv *resv;
+ bool clear;
+ int r;
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ /* Per VM BOs never need to bo cleared in the page tables */
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+ spin_lock(&vm->status_lock);
+ }
+
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+ base.vm_status);
+ resv = bo_va->base.bo->tbo.base.resv;
+ spin_unlock(&vm->status_lock);
+
+ /* Try to reserve the BO to avoid clearing its ptes */
+ if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+ clear = false;
+ /* Somebody else is using the BO right now */
+ else
+ clear = true;
+
+ r = amdgpu_vm_bo_update(adev, bo_va, clear);
+ if (r)
+ return r;
+
+ if (!clear)
+ dma_resv_unlock(resv);
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_add - add a bo to a specific vm
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @bo: amdgpu buffer object
+ *
+ * Add @bo into the requested vm.
+ * Add @bo to the list of bos associated with the vm
+ *
+ * Returns:
+ * Newly added bo_va or NULL for failure
+ *
+ * Object has to be reserved!
+ */
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_bo_va *bo_va;
+
+ bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
+ if (bo_va == NULL) {
+ return NULL;
+ }
+ amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
+
+ bo_va->ref_count = 1;
+ bo_va->last_pt_update = dma_fence_get_stub();
+ INIT_LIST_HEAD(&bo_va->valids);
+ INIT_LIST_HEAD(&bo_va->invalids);
+
+ if (!bo)
+ return bo_va;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+ if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
+ bo_va->is_xgmi = true;
+ /* Power up XGMI if it can be potentially used */
+ amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
+ }
+
+ return bo_va;
+}
+
+
+/**
+ * amdgpu_vm_bo_insert_map - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ struct amdgpu_bo_va_mapping *mapping)
+{
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+
+ mapping->bo_va = bo_va;
+ list_add(&mapping->list, &bo_va->invalids);
+ amdgpu_vm_it_insert(mapping, &vm->va);
+
+ if (mapping->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+
+ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+ !bo_va->base.moved) {
+ amdgpu_vm_bo_moved(&bo_va->base);
+ }
+ trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
+/**
+ * amdgpu_vm_bo_map - map bo inside a vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @saddr: where to map the BO
+ * @offset: requested offset in the BO
+ * @size: BO size in bytes
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add a mapping of the BO at the specefied addr into the VM.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t saddr, uint64_t offset,
+ uint64_t size, uint64_t flags)
+{
+ struct amdgpu_bo_va_mapping *mapping, *tmp;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ uint64_t eaddr;
+
+ /* validate the parameters */
+ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+ return -EINVAL;
+ if (saddr + size <= saddr || offset + size <= offset)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ eaddr = saddr + size - 1;
+ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+ if (tmp) {
+ /* bo and tmp overlap, invalid addr */
+ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
+ "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
+ tmp->start, tmp->last + 1);
+ return -EINVAL;
+ }
+
+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping)
+ return -ENOMEM;
+
+ mapping->start = saddr;
+ mapping->last = eaddr;
+ mapping->offset = offset;
+ mapping->flags = flags;
+
+ amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @saddr: where to map the BO
+ * @offset: requested offset in the BO
+ * @size: BO size in bytes
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add a mapping of the BO at the specefied addr into the VM. Replace existing
+ * mappings as we do so.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t saddr, uint64_t offset,
+ uint64_t size, uint64_t flags)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ uint64_t eaddr;
+ int r;
+
+ /* validate the parameters */
+ if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+ return -EINVAL;
+ if (saddr + size <= saddr || offset + size <= offset)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ eaddr = saddr + size - 1;
+ if ((bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+
+ /* Allocate all the needed memory */
+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping)
+ return -ENOMEM;
+
+ r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
+ if (r) {
+ kfree(mapping);
+ return r;
+ }
+
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ mapping->start = saddr;
+ mapping->last = eaddr;
+ mapping->offset = offset;
+ mapping->flags = flags;
+
+ amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_unmap - remove bo mapping from vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to remove the address from
+ * @saddr: where to the BO is mapped
+ *
+ * Remove a mapping of the BO at the specefied addr from the VM.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t saddr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ bool valid = true;
+
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ list_for_each_entry(mapping, &bo_va->valids, list) {
+ if (mapping->start == saddr)
+ break;
+ }
+
+ if (&mapping->list == &bo_va->valids) {
+ valid = false;
+
+ list_for_each_entry(mapping, &bo_va->invalids, list) {
+ if (mapping->start == saddr)
+ break;
+ }
+
+ if (&mapping->list == &bo_va->invalids)
+ return -ENOENT;
+ }
+
+ list_del(&mapping->list);
+ amdgpu_vm_it_remove(mapping, &vm->va);
+ mapping->bo_va = NULL;
+ trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+
+ if (valid)
+ list_add(&mapping->list, &vm->freed);
+ else
+ amdgpu_vm_free_mapping(adev, vm, mapping,
+ bo_va->last_pt_update);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM structure to use
+ * @saddr: start of the range
+ * @size: size of the range
+ *
+ * Remove all mappings in a range, split them as appropriate.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size)
+{
+ struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
+ LIST_HEAD(removed);
+ uint64_t eaddr;
+
+ eaddr = saddr + size - 1;
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
+
+ /* Allocate all the needed memory */
+ before = kzalloc(sizeof(*before), GFP_KERNEL);
+ if (!before)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&before->list);
+
+ after = kzalloc(sizeof(*after), GFP_KERNEL);
+ if (!after) {
+ kfree(before);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&after->list);
+
+ /* Now gather all removed mappings */
+ tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+ while (tmp) {
+ /* Remember mapping split at the start */
+ if (tmp->start < saddr) {
+ before->start = tmp->start;
+ before->last = saddr - 1;
+ before->offset = tmp->offset;
+ before->flags = tmp->flags;
+ before->bo_va = tmp->bo_va;
+ list_add(&before->list, &tmp->bo_va->invalids);
+ }
+
+ /* Remember mapping split at the end */
+ if (tmp->last > eaddr) {
+ after->start = eaddr + 1;
+ after->last = tmp->last;
+ after->offset = tmp->offset;
+ after->offset += (after->start - tmp->start) << PAGE_SHIFT;
+ after->flags = tmp->flags;
+ after->bo_va = tmp->bo_va;
+ list_add(&after->list, &tmp->bo_va->invalids);
+ }
+
+ list_del(&tmp->list);
+ list_add(&tmp->list, &removed);
+
+ tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
+ }
+
+ /* And free them up */
+ list_for_each_entry_safe(tmp, next, &removed, list) {
+ amdgpu_vm_it_remove(tmp, &vm->va);
+ list_del(&tmp->list);
+
+ if (tmp->start < saddr)
+ tmp->start = saddr;
+ if (tmp->last > eaddr)
+ tmp->last = eaddr;
+
+ tmp->bo_va = NULL;
+ list_add(&tmp->list, &vm->freed);
+ trace_amdgpu_vm_bo_unmap(NULL, tmp);
+ }
+
+ /* Insert partial mapping before the range */
+ if (!list_empty(&before->list)) {
+ struct amdgpu_bo *bo = before->bo_va->base.bo;
+
+ amdgpu_vm_it_insert(before, &vm->va);
+ if (before->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+
+ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+ !before->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&before->bo_va->base);
+ } else {
+ kfree(before);
+ }
+
+ /* Insert partial mapping after the range */
+ if (!list_empty(&after->list)) {
+ struct amdgpu_bo *bo = after->bo_va->base.bo;
+
+ amdgpu_vm_it_insert(after, &vm->va);
+ if (after->flags & AMDGPU_PTE_PRT)
+ amdgpu_vm_prt_get(adev);
+
+ if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+ !after->bo_va->base.moved)
+ amdgpu_vm_bo_moved(&after->bo_va->base);
+ } else {
+ kfree(after);
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_bo_lookup_mapping - find mapping by address
+ *
+ * @vm: the requested VM
+ * @addr: the address
+ *
+ * Find a mapping by it's address.
+ *
+ * Returns:
+ * The amdgpu_bo_va_mapping matching for addr or NULL
+ *
+ */
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+ uint64_t addr)
+{
+ return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
+}
+
+/**
+ * amdgpu_vm_bo_trace_cs - trace all reserved mappings
+ *
+ * @vm: the requested vm
+ * @ticket: CS ticket
+ *
+ * Trace all mappings of BOs reserved during a command submission.
+ */
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+
+ if (!trace_amdgpu_vm_bo_cs_enabled())
+ return;
+
+ for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
+ mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
+ if (mapping->bo_va && mapping->bo_va->base.bo) {
+ struct amdgpu_bo *bo;
+
+ bo = mapping->bo_va->base.bo;
+ if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
+ ticket)
+ continue;
+ }
+
+ trace_amdgpu_vm_bo_cs(mapping);
+ }
+}
+
+/**
+ * amdgpu_vm_bo_del - remove a bo from a specific vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: requested bo_va
+ *
+ * Remove @bo_va->bo from the requested vm.
+ *
+ * Object have to be reserved!
+ */
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va)
+{
+ struct amdgpu_bo_va_mapping *mapping, *next;
+ struct amdgpu_bo *bo = bo_va->base.bo;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_vm_bo_base **base;
+
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
+ if (bo) {
+ dma_resv_assert_held(bo->tbo.base.resv);
+ if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+ ttm_bo_set_bulk_move(&bo->tbo, NULL);
+
+ for (base = &bo_va->base.bo->vm_bo; *base;
+ base = &(*base)->next) {
+ if (*base != &bo_va->base)
+ continue;
+
+ *base = bo_va->base.next;
+ break;
+ }
+ }
+
+ spin_lock(&vm->status_lock);
+ list_del(&bo_va->base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
+ list_del(&mapping->list);
+ amdgpu_vm_it_remove(mapping, &vm->va);
+ mapping->bo_va = NULL;
+ trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ list_add(&mapping->list, &vm->freed);
+ }
+ list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
+ list_del(&mapping->list);
+ amdgpu_vm_it_remove(mapping, &vm->va);
+ amdgpu_vm_free_mapping(adev, vm, mapping,
+ bo_va->last_pt_update);
+ }
+
+ dma_fence_put(bo_va->last_pt_update);
+
+ if (bo && bo_va->is_xgmi)
+ amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
+
+ kfree(bo_va);
+}
+
+/**
+ * amdgpu_vm_evictable - check if we can evict a VM
+ *
+ * @bo: A page table of the VM.
+ *
+ * Check if it is possible to evict a VM.
+ */
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
+
+ /* Page tables of a destroyed VM can go away immediately */
+ if (!bo_base || !bo_base->vm)
+ return true;
+
+ /* Don't evict VM page tables while they are busy */
+ if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
+ return false;
+
+ /* Try to block ongoing updates */
+ if (!amdgpu_vm_eviction_trylock(bo_base->vm))
+ return false;
+
+ /* Don't evict VM page tables while they are updated */
+ if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return false;
+ }
+
+ bo_base->vm->evicting = true;
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return true;
+}
+
+/**
+ * amdgpu_vm_bo_invalidate - mark the bo as invalid
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: amdgpu buffer object
+ * @evicted: is the BO evicted
+ *
+ * Mark @bo as invalid.
+ */
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo, bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ /* shadow bo doesn't have bo base, its validation needs its parent */
+ if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
+ bo = bo->parent;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+ amdgpu_vm_bo_evicted(bo_base);
+ continue;
+ }
+
+ if (bo_base->moved)
+ continue;
+ bo_base->moved = true;
+
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ amdgpu_vm_bo_relocated(bo_base);
+ else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+ amdgpu_vm_bo_moved(bo_base);
+ else
+ amdgpu_vm_bo_invalidated(bo_base);
+ }
+}
+
+/**
+ * amdgpu_vm_get_block_size - calculate VM page table size as power of two
+ *
+ * @vm_size: VM size
+ *
+ * Returns:
+ * VM page table as power of two
+ */
+static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
+{
+ /* Total bits covered by PD + PTs */
+ unsigned bits = ilog2(vm_size) + 18;
+
+ /* Make sure the PD is 4K in size up to 8GB address space.
+ Above that split equal between PD and PTs */
+ if (vm_size <= 8)
+ return (bits - 9);
+ else
+ return ((bits + 3) / 2);
+}
+
+/**
+ * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
+ *
+ * @adev: amdgpu_device pointer
+ * @min_vm_size: the minimum vm size in GB if it's set auto
+ * @fragment_size_default: Default PTE fragment size
+ * @max_level: max VMPT level
+ * @max_bits: max address space size in bits
+ *
+ */
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
+ uint32_t fragment_size_default, unsigned max_level,
+ unsigned max_bits)
+{
+ unsigned int max_size = 1 << (max_bits - 30);
+ unsigned int vm_size;
+ uint64_t tmp;
+
+ /* adjust vm size first */
+ if (amdgpu_vm_size != -1) {
+ vm_size = amdgpu_vm_size;
+ if (vm_size > max_size) {
+ dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
+ amdgpu_vm_size, max_size);
+ vm_size = max_size;
+ }
+ } else {
+ struct sysinfo si;
+ unsigned int phys_ram_gb;
+
+ /* Optimal VM size depends on the amount of physical
+ * RAM available. Underlying requirements and
+ * assumptions:
+ *
+ * - Need to map system memory and VRAM from all GPUs
+ * - VRAM from other GPUs not known here
+ * - Assume VRAM <= system memory
+ * - On GFX8 and older, VM space can be segmented for
+ * different MTYPEs
+ * - Need to allow room for fragmentation, guard pages etc.
+ *
+ * This adds up to a rough guess of system memory x3.
+ * Round up to power of two to maximize the available
+ * VM size with the given page table size.
+ */
+ si_meminfo(&si);
+ phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
+ (1 << 30) - 1) >> 30;
+ vm_size = roundup_pow_of_two(
+ min(max(phys_ram_gb * 3, min_vm_size), max_size));
+ }
+
+ adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
+
+ tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
+ if (amdgpu_vm_block_size != -1)
+ tmp >>= amdgpu_vm_block_size - 9;
+ tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
+ adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+ switch (adev->vm_manager.num_level) {
+ case 3:
+ adev->vm_manager.root_level = AMDGPU_VM_PDB2;
+ break;
+ case 2:
+ adev->vm_manager.root_level = AMDGPU_VM_PDB1;
+ break;
+ case 1:
+ adev->vm_manager.root_level = AMDGPU_VM_PDB0;
+ break;
+ default:
+ dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
+ }
+ /* block size depends on vm size and hw setup*/
+ if (amdgpu_vm_block_size != -1)
+ adev->vm_manager.block_size =
+ min((unsigned)amdgpu_vm_block_size, max_bits
+ - AMDGPU_GPU_PAGE_SHIFT
+ - 9 * adev->vm_manager.num_level);
+ else if (adev->vm_manager.num_level > 1)
+ adev->vm_manager.block_size = 9;
+ else
+ adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
+
+ if (amdgpu_vm_fragment_size == -1)
+ adev->vm_manager.fragment_size = fragment_size_default;
+ else
+ adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
+
+ DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+ vm_size, adev->vm_manager.num_level + 1,
+ adev->vm_manager.block_size,
+ adev->vm_manager.fragment_size);
+}
+
+/**
+ * amdgpu_vm_wait_idle - wait for the VM to become idle
+ *
+ * @vm: VM object to wait for
+ * @timeout: timeout to wait for VM to become idle
+ */
+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
+{
+ timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP,
+ true, timeout);
+ if (timeout <= 0)
+ return timeout;
+
+ return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
+}
+
+/**
+ * amdgpu_vm_init - initialize a vm instance
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @xcp_id: GPU partition selection id
+ *
+ * Init @vm fields.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int32_t xcp_id)
+{
+ struct amdgpu_bo *root_bo;
+ struct amdgpu_bo_vm *root;
+ int r, i;
+
+ vm->va = RB_ROOT_CACHED;
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+ vm->reserved_vmid[i] = NULL;
+ INIT_LIST_HEAD(&vm->evicted);
+ INIT_LIST_HEAD(&vm->relocated);
+ INIT_LIST_HEAD(&vm->moved);
+ INIT_LIST_HEAD(&vm->idle);
+ INIT_LIST_HEAD(&vm->invalidated);
+ spin_lock_init(&vm->status_lock);
+ INIT_LIST_HEAD(&vm->freed);
+ INIT_LIST_HEAD(&vm->done);
+ INIT_LIST_HEAD(&vm->pt_freed);
+ INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
+ INIT_KFIFO(vm->faults);
+
+ r = amdgpu_vm_init_entities(adev, vm);
+ if (r)
+ return r;
+
+ vm->pte_support_ats = false;
+ vm->is_compute_context = false;
+
+ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+ AMDGPU_VM_USE_CPU_FOR_GFX);
+
+ DRM_DEBUG_DRIVER("VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ "CPU update of VM recommended only for large BAR system\n");
+
+ if (vm->use_cpu_for_update)
+ vm->update_funcs = &amdgpu_vm_cpu_funcs;
+ else
+ vm->update_funcs = &amdgpu_vm_sdma_funcs;
+
+ vm->last_update = dma_fence_get_stub();
+ vm->last_unlocked = dma_fence_get_stub();
+ vm->last_tlb_flush = dma_fence_get_stub();
+ vm->generation = 0;
+
+ mutex_init(&vm->eviction_lock);
+ vm->evicting = false;
+
+ r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
+ false, &root, xcp_id);
+ if (r)
+ goto error_free_delayed;
+
+ root_bo = amdgpu_bo_ref(&root->bo);
+ r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root->shadow);
+ amdgpu_bo_unref(&root_bo);
+ goto error_free_delayed;
+ }
+
+ amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+ r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
+ if (r)
+ goto error_free_root;
+
+ r = amdgpu_vm_pt_clear(adev, vm, root, false);
+ if (r)
+ goto error_free_root;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
+
+ return 0;
+
+error_free_root:
+ amdgpu_vm_pt_free_root(adev, vm);
+ amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
+
+error_free_delayed:
+ dma_fence_put(vm->last_tlb_flush);
+ dma_fence_put(vm->last_unlocked);
+ amdgpu_vm_fini_entities(vm);
+
+ return r;
+}
+
+/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+ int r;
+
+ r = amdgpu_bo_reserve(vm->root.bo, true);
+ if (r)
+ return r;
+
+ /* Check if PD needs to be reinitialized and do it before
+ * changing any other state, in case it fails.
+ */
+ if (pte_support_ats != vm->pte_support_ats) {
+ /* Sanity checks */
+ if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
+ r = -EINVAL;
+ goto unreserve_bo;
+ }
+
+ vm->pte_support_ats = pte_support_ats;
+ r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
+ false);
+ if (r)
+ goto unreserve_bo;
+ }
+
+ /* Update VM state */
+ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+ AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+ DRM_DEBUG_DRIVER("VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ "CPU update of VM recommended only for large BAR system\n");
+
+ if (vm->use_cpu_for_update) {
+ /* Sync with last SDMA update/clear before switching to CPU */
+ r = amdgpu_bo_sync_wait(vm->root.bo,
+ AMDGPU_FENCE_OWNER_UNDEFINED, true);
+ if (r)
+ goto unreserve_bo;
+
+ vm->update_funcs = &amdgpu_vm_cpu_funcs;
+ r = amdgpu_vm_pt_map_tables(adev, vm);
+ if (r)
+ goto unreserve_bo;
+
+ } else {
+ vm->update_funcs = &amdgpu_vm_sdma_funcs;
+ }
+
+ dma_fence_put(vm->last_update);
+ vm->last_update = dma_fence_get_stub();
+ vm->is_compute_context = true;
+
+ /* Free the shadow bo for compute VM */
+ amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
+
+ goto unreserve_bo;
+
+unreserve_bo:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
+/**
+ * amdgpu_vm_release_compute - release a compute vm
+ * @adev: amdgpu_device pointer
+ * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
+ *
+ * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
+ * pasid from vm. Compute should stop use of vm after this call.
+ */
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ amdgpu_vm_set_pasid(adev, vm, 0);
+ vm->is_compute_context = false;
+}
+
+/**
+ * amdgpu_vm_fini - tear down a vm instance
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Tear down @vm.
+ * Unbind the VM and remove all bos from the vm bo list
+ */
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_bo_va_mapping *mapping, *tmp;
+ bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
+ struct amdgpu_bo *root;
+ unsigned long flags;
+ int i;
+
+ amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
+ flush_work(&vm->pt_free_work);
+
+ root = amdgpu_bo_ref(vm->root.bo);
+ amdgpu_bo_reserve(root, true);
+ amdgpu_vm_set_pasid(adev, vm, 0);
+ dma_fence_wait(vm->last_unlocked, false);
+ dma_fence_put(vm->last_unlocked);
+ dma_fence_wait(vm->last_tlb_flush, false);
+ /* Make sure that all fence callbacks have completed */
+ spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
+ spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+ dma_fence_put(vm->last_tlb_flush);
+
+ list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
+ if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
+ amdgpu_vm_prt_fini(adev, vm);
+ prt_fini_needed = false;
+ }
+
+ list_del(&mapping->list);
+ amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
+ }
+
+ amdgpu_vm_pt_free_root(adev, vm);
+ amdgpu_bo_unreserve(root);
+ amdgpu_bo_unref(&root);
+ WARN_ON(vm->root.bo);
+
+ amdgpu_vm_fini_entities(vm);
+
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
+ dev_err(adev->dev, "still active bo inside vm\n");
+ }
+ rbtree_postorder_for_each_entry_safe(mapping, tmp,
+ &vm->va.rb_root, rb) {
+ /* Don't remove the mapping here, we don't want to trigger a
+ * rebalance and the tree is about to be destroyed anyway.
+ */
+ list_del(&mapping->list);
+ kfree(mapping);
+ }
+
+ dma_fence_put(vm->last_update);
+
+ for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+ if (vm->reserved_vmid[i]) {
+ amdgpu_vmid_free_reserved(adev, i);
+ vm->reserved_vmid[i] = false;
+ }
+ }
+
+}
+
+/**
+ * amdgpu_vm_manager_init - init the VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Concurrent flushes are only possible starting with Vega10 and
+ * are broken on Navi10 and Navi14.
+ */
+ adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
+ adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14);
+ amdgpu_vmid_mgr_init(adev);
+
+ adev->vm_manager.fence_context =
+ dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+ adev->vm_manager.seqno[i] = 0;
+
+ spin_lock_init(&adev->vm_manager.prt_lock);
+ atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+ /* If not overridden by the user, by default, only in large BAR systems
+ * Compute VM tables will be updated by CPU
+ */
+#ifdef CONFIG_X86_64
+ if (amdgpu_vm_update_mode == -1) {
+ /* For asic with VF MMIO access protection
+ * avoid using CPU for VM table updates
+ */
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ !amdgpu_sriov_vf_mmio_access_protection(adev))
+ adev->vm_manager.vm_update_mode =
+ AMDGPU_VM_USE_CPU_FOR_COMPUTE;
+ else
+ adev->vm_manager.vm_update_mode = 0;
+ } else
+ adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
+#else
+ adev->vm_manager.vm_update_mode = 0;
+#endif
+
+ xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
+}
+
+/**
+ * amdgpu_vm_manager_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+{
+ WARN_ON(!xa_empty(&adev->vm_manager.pasids));
+ xa_destroy(&adev->vm_manager.pasids);
+
+ amdgpu_vmid_mgr_fini(adev);
+}
+
+/**
+ * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_vm
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ union drm_amdgpu_vm *args = data;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ /* No valid flags defined yet */
+ if (args->in.flags)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_VM_OP_RESERVE_VMID:
+ /* We only have requirement to reserve vmid from gfxhub */
+ if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+ fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
+ }
+
+ break;
+ case AMDGPU_VM_OP_UNRESERVE_VMID:
+ if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
+ fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_get_task_info - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ * @task_info: task_info to fill.
+ */
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
+ struct amdgpu_task_info *task_info)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm)
+ *task_info = vm->task_info;
+
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (vm->task_info.pid)
+ return;
+
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name, current->group_leader);
+}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ * GFX 9.4.3.
+ * @addr: Address of the fault
+ * @write_fault: true is write fault, false is read fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
+ u32 vmid, u32 node_id, uint64_t addr,
+ bool write_fault)
+{
+ bool is_compute_context = false;
+ struct amdgpu_bo *root;
+ unsigned long irqflags;
+ uint64_t value, flags;
+ struct amdgpu_vm *vm;
+ int r;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm) {
+ root = amdgpu_bo_ref(vm->root.bo);
+ is_compute_context = vm->is_compute_context;
+ } else {
+ root = NULL;
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+
+ if (!root)
+ return false;
+
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+
+ if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr, write_fault)) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
+
+ r = amdgpu_bo_reserve(root, true);
+ if (r)
+ goto error_unref;
+
+ /* Double check that the VM still exists */
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm && vm->root.bo != root)
+ vm = NULL;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+ if (!vm)
+ goto error_unlock;
+
+ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+ AMDGPU_PTE_SYSTEM;
+
+ if (is_compute_context) {
+ /* Intentionally setting invalid PTE flag
+ * combination to force a no-retry-fault
+ */
+ flags = AMDGPU_VM_NORETRY_FLAGS;
+ value = 0;
+ } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+ /* Redirect the access to the dummy page */
+ value = adev->dummy_page_addr;
+ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+ AMDGPU_PTE_WRITEABLE;
+
+ } else {
+ /* Let the hw retry silently on the PTE */
+ value = 0;
+ }
+
+ r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
+ if (r) {
+ pr_debug("failed %d to reserve fence slot\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
+ addr, flags, value, 0, NULL, NULL, NULL);
+ if (r)
+ goto error_unlock;
+
+ r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+ amdgpu_bo_unreserve(root);
+ if (r < 0)
+ DRM_ERROR("Can't handle page fault (%d)\n", r);
+
+error_unref:
+ amdgpu_bo_unref(&root);
+
+ return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+/**
+ * amdgpu_debugfs_vm_bo_info - print BO info for the VM
+ *
+ * @vm: Requested VM for printing BO info
+ * @m: debugfs file
+ *
+ * Print BO information in debugfs file for the VM
+ */
+void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
+{
+ struct amdgpu_bo_va *bo_va, *tmp;
+ u64 total_idle = 0;
+ u64 total_evicted = 0;
+ u64 total_relocated = 0;
+ u64 total_moved = 0;
+ u64 total_invalidated = 0;
+ u64 total_done = 0;
+ unsigned int total_idle_objs = 0;
+ unsigned int total_evicted_objs = 0;
+ unsigned int total_relocated_objs = 0;
+ unsigned int total_moved_objs = 0;
+ unsigned int total_invalidated_objs = 0;
+ unsigned int total_done_objs = 0;
+ unsigned int id = 0;
+
+ spin_lock(&vm->status_lock);
+ seq_puts(m, "\tIdle BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_idle_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tEvicted BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_evicted_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tRelocated BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_relocated_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tMoved BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_moved_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tInvalidated BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_invalidated_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tDone BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ spin_unlock(&vm->status_lock);
+ total_done_objs = id;
+
+ seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
+ total_idle_objs);
+ seq_printf(m, "\tTotal evicted size: %12lld\tobjs:\t%d\n", total_evicted,
+ total_evicted_objs);
+ seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", total_relocated,
+ total_relocated_objs);
+ seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", total_moved,
+ total_moved_objs);
+ seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
+ total_invalidated_objs);
+ seq_printf(m, "\tTotal done size: %12lld\tobjs:\t%d\n", total_done,
+ total_done_objs);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
new file mode 100644
index 000000000..204ab1318
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_VM_H__
+#define __AMDGPU_VM_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+
+struct drm_exec;
+
+struct amdgpu_bo_va;
+struct amdgpu_job;
+struct amdgpu_bo_list_entry;
+struct amdgpu_bo_vm;
+struct amdgpu_mem_stats;
+
+/*
+ * GPUVM handling
+ */
+
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
+
+/* number of entries in page table */
+#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
+
+#define AMDGPU_PTE_VALID (1ULL << 0)
+#define AMDGPU_PTE_SYSTEM (1ULL << 1)
+#define AMDGPU_PTE_SNOOPED (1ULL << 2)
+
+/* RV+ */
+#define AMDGPU_PTE_TMZ (1ULL << 3)
+
+/* VI only */
+#define AMDGPU_PTE_EXECUTABLE (1ULL << 4)
+
+#define AMDGPU_PTE_READABLE (1ULL << 5)
+#define AMDGPU_PTE_WRITEABLE (1ULL << 6)
+
+#define AMDGPU_PTE_FRAG(x) ((x & 0x1fULL) << 7)
+
+/* TILED for VEGA10, reserved for older ASICs */
+#define AMDGPU_PTE_PRT (1ULL << 51)
+
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE (1ULL << 54)
+
+#define AMDGPU_PTE_LOG (1ULL << 55)
+
+/* PTE is handled as PDE for VEGA10 (Translate Further) */
+#define AMDGPU_PTE_TF (1ULL << 56)
+
+/* MALL noalloc for sienna_cichlid, reserved for older ASICs */
+#define AMDGPU_PTE_NOALLOC (1ULL << 58)
+
+/* PDE Block Fragment Size for VEGA10 */
+#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
+
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS (AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \
+ AMDGPU_PTE_TF)
+
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+ AMDGPU_PTE_PRT)
+/* For GFX9 */
+#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
+
+#define AMDGPU_MTYPE_NC 0
+#define AMDGPU_MTYPE_CC 2
+
+#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
+ | AMDGPU_PTE_SNOOPED \
+ | AMDGPU_PTE_EXECUTABLE \
+ | AMDGPU_PTE_READABLE \
+ | AMDGPU_PTE_WRITEABLE \
+ | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
+
+/* gfx10 */
+#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
+
+/* How to program VM fault handling */
+#define AMDGPU_VM_FAULT_STOP_NEVER 0
+#define AMDGPU_VM_FAULT_STOP_FIRST 1
+#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
+
+/* Reserve 4MB VRAM for page tables */
+#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
+
+/*
+ * max number of VMHUB
+ * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
+ */
+#define AMDGPU_MAX_VMHUBS 13
+#define AMDGPU_GFXHUB(x) (x)
+#define AMDGPU_MMHUB0(x) (8 + x)
+#define AMDGPU_MMHUB1(x) (8 + 4 + x)
+
+/* Reserve 2MB at top/bottom of address space for kernel use */
+#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
+
+/* See vm_update_mode */
+#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
+#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
+
+/* VMPT level enumerate, and the hiberachy is:
+ * PDB2->PDB1->PDB0->PTB
+ */
+enum amdgpu_vm_level {
+ AMDGPU_VM_PDB2,
+ AMDGPU_VM_PDB1,
+ AMDGPU_VM_PDB0,
+ AMDGPU_VM_PTB
+};
+
+/* base structure for tracking BO usage in a VM */
+struct amdgpu_vm_bo_base {
+ /* constant after initialization */
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo *bo;
+
+ /* protected by bo being reserved */
+ struct amdgpu_vm_bo_base *next;
+
+ /* protected by spinlock */
+ struct list_head vm_status;
+
+ /* protected by the BO being reserved */
+ bool moved;
+};
+
+/* provided by hw blocks that can write ptes, e.g., sdma */
+struct amdgpu_vm_pte_funcs {
+ /* number of dw to reserve per operation */
+ unsigned copy_pte_num_dw;
+
+ /* copy pte entries from GART */
+ void (*copy_pte)(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count);
+
+ /* write pte one entry at a time with addr mapping */
+ void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr);
+ /* for linear pte/pde updates without addr mapping */
+ void (*set_pte_pde)(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags);
+};
+
+struct amdgpu_task_info {
+ char process_name[TASK_COMM_LEN];
+ char task_name[TASK_COMM_LEN];
+ pid_t pid;
+ pid_t tgid;
+};
+
+/**
+ * struct amdgpu_vm_update_params
+ *
+ * Encapsulate some VM table update parameters to reduce
+ * the number of function parameters
+ *
+ */
+struct amdgpu_vm_update_params {
+
+ /**
+ * @adev: amdgpu device we do this update for
+ */
+ struct amdgpu_device *adev;
+
+ /**
+ * @vm: optional amdgpu_vm we do this update for
+ */
+ struct amdgpu_vm *vm;
+
+ /**
+ * @immediate: if changes should be made immediately
+ */
+ bool immediate;
+
+ /**
+ * @unlocked: true if the root BO is not locked
+ */
+ bool unlocked;
+
+ /**
+ * @pages_addr:
+ *
+ * DMA addresses to use for mapping
+ */
+ dma_addr_t *pages_addr;
+
+ /**
+ * @job: job to used for hw submission
+ */
+ struct amdgpu_job *job;
+
+ /**
+ * @num_dw_left: number of dw left for the IB
+ */
+ unsigned int num_dw_left;
+
+ /**
+ * @table_freed: return true if page table is freed when updating
+ */
+ bool table_freed;
+};
+
+struct amdgpu_vm_update_funcs {
+ int (*map_table)(struct amdgpu_bo_vm *bo);
+ int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
+ enum amdgpu_sync_mode sync_mode);
+ int (*update)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr, uint64_t flags);
+ int (*commit)(struct amdgpu_vm_update_params *p,
+ struct dma_fence **fence);
+};
+
+struct amdgpu_vm {
+ /* tree of virtual addresses mapped */
+ struct rb_root_cached va;
+
+ /* Lock to prevent eviction while we are updating page tables
+ * use vm_eviction_lock/unlock(vm)
+ */
+ struct mutex eviction_lock;
+ bool evicting;
+ unsigned int saved_flags;
+
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
+ /* BOs who needs a validation */
+ struct list_head evicted;
+
+ /* PT BOs which relocated and their parent need an update */
+ struct list_head relocated;
+
+ /* per VM BOs moved, but not yet updated in the PT */
+ struct list_head moved;
+
+ /* All BOs of this VM not currently in the state machine */
+ struct list_head idle;
+
+ /* regular invalidated BOs, but not yet updated in the PT */
+ struct list_head invalidated;
+
+ /* BO mappings freed, but not yet updated in the PT */
+ struct list_head freed;
+
+ /* BOs which are invalidated, has been updated in the PTs */
+ struct list_head done;
+
+ /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
+ struct list_head pt_freed;
+ struct work_struct pt_free_work;
+
+ /* contains the page directory */
+ struct amdgpu_vm_bo_base root;
+ struct dma_fence *last_update;
+
+ /* Scheduler entities for page table updates */
+ struct drm_sched_entity immediate;
+ struct drm_sched_entity delayed;
+
+ /* Last finished delayed update */
+ atomic64_t tlb_seq;
+ struct dma_fence *last_tlb_flush;
+
+ /* How many times we had to re-generate the page tables */
+ uint64_t generation;
+
+ /* Last unlocked submission to the scheduler entities */
+ struct dma_fence *last_unlocked;
+
+ unsigned int pasid;
+ bool reserved_vmid[AMDGPU_MAX_VMHUBS];
+
+ /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
+ bool use_cpu_for_update;
+
+ /* Functions to use for VM table updates */
+ const struct amdgpu_vm_update_funcs *update_funcs;
+
+ /* Flag to indicate ATS support from PTE for GFX9 */
+ bool pte_support_ats;
+
+ /* Up to 128 pending retry page faults */
+ DECLARE_KFIFO(faults, u64, 128);
+
+ /* Points to the KFD process VM info */
+ struct amdkfd_process_info *process_info;
+
+ /* List node in amdkfd_process_info.vm_list_head */
+ struct list_head vm_list_node;
+
+ /* Valid while the PD is reserved or fenced */
+ uint64_t pd_phys_addr;
+
+ /* Some basic info about the task */
+ struct amdgpu_task_info task_info;
+
+ /* Store positions of group of BOs */
+ struct ttm_lru_bulk_move lru_bulk_move;
+ /* Flag to indicate if VM is used for compute */
+ bool is_compute_context;
+
+ /* Memory partition number, -1 means any partition */
+ int8_t mem_id;
+};
+
+struct amdgpu_vm_manager {
+ /* Handling of VMIDs */
+ struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
+ unsigned int first_kfd_vmid;
+ bool concurrent_flush;
+
+ /* Handling of VM fences */
+ u64 fence_context;
+ unsigned seqno[AMDGPU_MAX_RINGS];
+
+ uint64_t max_pfn;
+ uint32_t num_level;
+ uint32_t block_size;
+ uint32_t fragment_size;
+ enum amdgpu_vm_level root_level;
+ /* vram base address for page table entry */
+ u64 vram_base_offset;
+ /* vm pte handling */
+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
+ struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_scheds;
+ struct amdgpu_ring *page_fault;
+
+ /* partial resident texture handling */
+ spinlock_t prt_lock;
+ atomic_t num_prt_users;
+
+ /* controls how VM page tables are updated for Graphics and Compute.
+ * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
+ * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
+ */
+ int vm_update_mode;
+
+ /* PASID to VM mapping, will be used in interrupt context to
+ * look up VM of a page fault
+ */
+ struct xarray pasids;
+};
+
+struct amdgpu_bo_va_mapping;
+
+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+
+extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs;
+extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
+
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+
+int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ u32 pasid);
+
+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
+bool amdgpu_vm_ready(struct amdgpu_vm *vm);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int (*callback)(void *p, struct amdgpu_bo *bo),
+ void *param);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool immediate);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ bool clear);
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo, bool evicted);
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint64_t flags);
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint64_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr);
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t saddr, uint64_t size);
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+ uint64_t addr);
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va);
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
+ uint32_t fragment_size_default, unsigned max_level,
+ unsigned max_bits);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
+
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
+ struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
+ u32 vmid, u32 node_id, uint64_t addr,
+ bool write_fault);
+
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+ struct amdgpu_mem_stats *stats);
+
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate);
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id);
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry);
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
+
+#if defined(CONFIG_DEBUG_FS)
+void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
+#endif
+
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+ unsigned long flags;
+ spinlock_t *lock;
+
+ /*
+ * Workaround to stop racing between the fence signaling and handling
+ * the cb. The lock is static after initially setting it up, just make
+ * sure that the dma_fence structure isn't freed up.
+ */
+ rcu_read_lock();
+ lock = vm->last_tlb_flush->lock;
+ rcu_read_unlock();
+
+ spin_lock_irqsave(lock, flags);
+ spin_unlock_irqrestore(lock, flags);
+
+ return atomic64_read(&vm->tlb_seq);
+}
+
+/*
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_noreclaim_save();
+ return true;
+ }
+ return false;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_noreclaim_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
new file mode 100644
index 000000000..6e3162145
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_vm.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+
+/**
+ * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
+{
+ table->bo.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ return amdgpu_bo_kmap(&table->bo, NULL);
+}
+
+/**
+ * amdgpu_vm_cpu_prepare - prepare page table update with the CPU
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @resv: reservation object with embedded fence
+ * @sync_mode: synchronization mode
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
+ struct dma_resv *resv,
+ enum amdgpu_sync_mode sync_mode)
+{
+ if (!resv)
+ return 0;
+
+ return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
+}
+
+/**
+ * amdgpu_vm_cpu_update - helper to update page tables via CPU
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @vmbo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Write count number of PT/PD entries directly.
+ */
+static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo_vm *vmbo, uint64_t pe,
+ uint64_t addr, unsigned count, uint32_t incr,
+ uint64_t flags)
+{
+ unsigned int i;
+ uint64_t value;
+ long r;
+
+ r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
+ pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
+
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
+
+ for (i = 0; i < count; i++) {
+ value = p->pages_addr ?
+ amdgpu_vm_map_gart(p->pages_addr, addr) :
+ addr;
+ amdgpu_gmc_set_pte_pde(p->adev, (void *)(uintptr_t)pe,
+ i, value, flags);
+ addr += incr;
+ }
+ return 0;
+}
+
+/**
+ * amdgpu_vm_cpu_commit - commit page table update to the HW
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @fence: unused
+ *
+ * Make sure that the hardware sees the page table updates.
+ */
+static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
+ struct dma_fence **fence)
+{
+ /* Flush HDP */
+ mb();
+ amdgpu_device_flush_hdp(p->adev, NULL);
+ return 0;
+}
+
+const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = {
+ .map_table = amdgpu_vm_cpu_map_table,
+ .prepare = amdgpu_vm_cpu_prepare,
+ .update = amdgpu_vm_cpu_update,
+ .commit = amdgpu_vm_cpu_commit
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
new file mode 100644
index 000000000..0d51222f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -0,0 +1,1108 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_vm.h"
+
+/*
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+ uint64_t pfn;
+ struct amdgpu_vm_bo_base *parent;
+ struct amdgpu_vm_bo_base *entry;
+ unsigned int level;
+};
+
+/**
+ * amdgpu_vm_pt_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ switch (level) {
+ case AMDGPU_VM_PDB2:
+ case AMDGPU_VM_PDB1:
+ case AMDGPU_VM_PDB0:
+ return 9 * (AMDGPU_VM_PDB0 - level) +
+ adev->vm_manager.block_size;
+ case AMDGPU_VM_PTB:
+ return 0;
+ default:
+ return ~0;
+ }
+}
+
+/**
+ * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of entries in a page directory or page table.
+ */
+static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ if (level == adev->vm_manager.root_level)
+ /* For the root directory */
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
+ else if (level != AMDGPU_VM_PTB)
+ /* Everything in between */
+ return 512;
+
+ /* For the page tables on the leaves */
+ return AMDGPU_VM_PTE_COUNT(adev);
+}
+
+/**
+ * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * The number of entries in the root page directory which needs the ATS setting.
+ */
+static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
+}
+
+/**
+ * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ if (level <= adev->vm_manager.root_level)
+ return 0xffffffff;
+ else if (level != AMDGPU_VM_PTB)
+ return 0x1ff;
+ else
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
+ * amdgpu_vm_pt_size - returns the size of the page table in bytes
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
+ */
+static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_bo_base *
+amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
+{
+ struct amdgpu_bo *parent = pt->bo->parent;
+
+ if (!parent)
+ return NULL;
+
+ return parent->vm_bo;
+}
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ cursor->pfn = start;
+ cursor->parent = NULL;
+ cursor->entry = &vm->root;
+ cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned int mask, shift, idx;
+
+ if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
+ !cursor->entry->bo)
+ return false;
+
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
+
+ ++cursor->level;
+ idx = (cursor->pfn >> shift) & mask;
+ cursor->parent = cursor->entry;
+ cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+
+ unsigned int shift, num_entries;
+ struct amdgpu_bo_vm *parent;
+
+ /* Root doesn't have a sibling */
+ if (!cursor->parent)
+ return false;
+
+ /* Go to our parents and see if we got a sibling */
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
+ num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
+ parent = to_amdgpu_bo_vm(cursor->parent->bo);
+
+ if (cursor->entry == &parent->entries[num_entries - 1])
+ return false;
+
+ cursor->pfn += 1ULL << shift;
+ cursor->pfn &= ~((1ULL << shift) - 1);
+ ++cursor->entry;
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->parent)
+ return false;
+
+ --cursor->level;
+ cursor->entry = cursor->parent;
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ /* First try a newborn child */
+ if (amdgpu_vm_pt_descendant(adev, cursor))
+ return;
+
+ /* If that didn't worked try to find a sibling */
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+ /* No sibling, go to our parents and grandparents */
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
+ cursor->pfn = ~0ll;
+ return;
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (start)
+ *cursor = *start;
+ else
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
+
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+}
+
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_bo_base *entry)
+{
+ return entry && (!start || entry != start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->entry)
+ return;
+
+ if (!cursor->parent)
+ cursor->entry = NULL;
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+ else
+ amdgpu_vm_pt_ancestor(cursor);
+}
+
+/*
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+ amdgpu_vm_pt_continue_dfs((start), (entry)); \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_clear - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
+ * @vmbo: BO to clear
+ * @immediate: use an immediate update
+ *
+ * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate)
+{
+ unsigned int level = adev->vm_manager.root_level;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_bo *ancestor = &vmbo->bo;
+ unsigned int entries, ats_entries;
+ struct amdgpu_bo *bo = &vmbo->bo;
+ uint64_t addr;
+ int r, idx;
+
+ /* Figure out our place in the hierarchy */
+ if (ancestor->parent) {
+ ++level;
+ while (ancestor->parent->parent) {
+ ++level;
+ ancestor = ancestor->parent;
+ }
+ }
+
+ entries = amdgpu_bo_size(bo) / 8;
+ if (!vm->pte_support_ats) {
+ ats_entries = 0;
+
+ } else if (!bo->parent) {
+ ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
+ ats_entries = min(ats_entries, entries);
+ entries -= ats_entries;
+
+ } else {
+ struct amdgpu_vm_bo_base *pt;
+
+ pt = ancestor->vm_bo;
+ ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
+ if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
+ ats_entries) {
+ ats_entries = 0;
+ } else {
+ ats_entries = entries;
+ entries = 0;
+ }
+ }
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return r;
+
+ if (vmbo->shadow) {
+ struct amdgpu_bo *shadow = vmbo->shadow;
+
+ r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
+ if (r)
+ return r;
+ }
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ r = vm->update_funcs->map_table(vmbo);
+ if (r)
+ goto exit;
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+
+ r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ if (r)
+ goto exit;
+
+ addr = 0;
+ if (ats_entries) {
+ uint64_t value = 0, flags;
+
+ flags = AMDGPU_PTE_DEFAULT_ATC;
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0,
+ ats_entries, value, flags);
+ if (r)
+ goto exit;
+
+ addr += ats_entries * 8;
+ }
+
+ if (entries) {
+ uint64_t value = 0, flags = 0;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(adev, level,
+ &value, &flags);
+ } else {
+ /* Workaround for fault priority problem on GMC9 */
+ flags = AMDGPU_PTE_EXECUTABLE;
+ }
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+ value, flags);
+ if (r)
+ goto exit;
+ }
+
+ r = vm->update_funcs->commit(&params, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_create - create bo for PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requesting vm
+ * @level: the page table level
+ * @immediate: use a immediate update
+ * @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
+ */
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id)
+{
+ struct amdgpu_bo_param bp;
+ struct amdgpu_bo *bo;
+ struct dma_resv *resv;
+ unsigned int num_entries;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+
+ if (!adev->gmc.is_app_apu)
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ else
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
+ bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (level < AMDGPU_VM_PTB)
+ num_entries = amdgpu_vm_pt_num_entries(adev, level);
+ else
+ num_entries = 0;
+
+ bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
+ if (vm->use_cpu_for_update)
+ bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ bp.type = ttm_bo_type_kernel;
+ bp.no_wait_gpu = immediate;
+ bp.xcp_id_plus1 = xcp_id + 1;
+
+ if (vm->root.bo)
+ bp.resv = vm->root.bo->tbo.base.resv;
+
+ r = amdgpu_bo_create_vm(adev, &bp, vmbo);
+ if (r)
+ return r;
+
+ bo = &(*vmbo)->bo;
+ if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
+ (*vmbo)->shadow = NULL;
+ return 0;
+ }
+
+ if (!bp.resv)
+ WARN_ON(dma_resv_lock(bo->tbo.base.resv,
+ NULL));
+ resv = bp.resv;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = bo->tbo.base.resv;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.xcp_id_plus1 = xcp_id + 1;
+
+ r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
+
+ if (!resv)
+ dma_resv_unlock(bo->tbo.base.resv);
+
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return r;
+ }
+
+ amdgpu_bo_add_to_shadow_list(*vmbo);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_pt_alloc - Allocate a specific page table
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @cursor: Which page table to allocate
+ * @immediate: use an immediate update
+ *
+ * Make sure a specific page table or directory is allocated.
+ *
+ * Returns:
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
+ */
+static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool immediate)
+{
+ struct amdgpu_vm_bo_base *entry = cursor->entry;
+ struct amdgpu_bo *pt_bo;
+ struct amdgpu_bo_vm *pt;
+ int r;
+
+ if (entry->bo)
+ return 0;
+
+ amdgpu_vm_eviction_unlock(vm);
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+ vm->root.bo->xcp_id);
+ amdgpu_vm_eviction_lock(vm);
+ if (r)
+ return r;
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt_bo = &pt->bo;
+ pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
+ amdgpu_vm_bo_base_init(entry, vm, pt_bo);
+ r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
+ if (r)
+ goto error_free_pt;
+
+ return 0;
+
+error_free_pt:
+ amdgpu_bo_unref(&pt->shadow);
+ amdgpu_bo_unref(&pt_bo);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_free - free one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_bo *shadow;
+
+ if (!entry->bo)
+ return;
+
+ entry->bo->vm_bo = NULL;
+ shadow = amdgpu_bo_shadowed(entry->bo);
+ if (shadow) {
+ ttm_bo_set_bulk_move(&shadow->tbo, NULL);
+ amdgpu_bo_unref(&shadow);
+ }
+ ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+
+ spin_lock(&entry->vm->status_lock);
+ list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
+ amdgpu_bo_unref(&entry->bo);
+}
+
+void amdgpu_vm_pt_free_work(struct work_struct *work)
+{
+ struct amdgpu_vm_bo_base *entry, *next;
+ struct amdgpu_vm *vm;
+ LIST_HEAD(pt_freed);
+
+ vm = container_of(work, struct amdgpu_vm, pt_free_work);
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->pt_freed, &pt_freed);
+ spin_unlock(&vm->status_lock);
+
+ /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
+ amdgpu_bo_reserve(vm->root.bo, true);
+
+ list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
+ amdgpu_vm_pt_free(entry);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+}
+
+/**
+ * amdgpu_vm_pt_free_dfs - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ * @start: optional cursor where to start freeing PDs/PTs
+ * @unlocked: vm resv unlock status
+ *
+ * Free the page directory or page table level and all sub levels.
+ */
+static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start,
+ bool unlocked)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ if (unlocked) {
+ spin_lock(&vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ list_move(&entry->vm_status, &vm->pt_freed);
+
+ if (start)
+ list_move(&start->entry->vm_status, &vm->pt_freed);
+ spin_unlock(&vm->status_lock);
+ schedule_work(&vm->pt_free_work);
+ return;
+ }
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ amdgpu_vm_pt_free(entry);
+
+ if (start)
+ amdgpu_vm_pt_free(start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_free_root - free root PD
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the root page directory and everything below it.
+ */
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
+}
+
+/**
+ * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: the VM to check
+ *
+ * Check all entries of the root PD, if any subsequent PDs are allocated,
+ * it means there are page table creating and filling, and is no a clean
+ * VM
+ *
+ * Returns:
+ * 0 if this VM is clean
+ */
+bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ enum amdgpu_vm_level root = adev->vm_manager.root_level;
+ unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
+ unsigned int i = 0;
+
+ for (i = 0; i < entries; i++) {
+ if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * amdgpu_vm_pde_update - update a single level in the hierarchy
+ *
+ * @params: parameters for the update
+ * @entry: entry to update
+ *
+ * Makes sure the requested entry in parent is up to date.
+ */
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
+ struct amdgpu_bo *bo = parent->bo, *pbo;
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t pde, pt, flags;
+ unsigned int level;
+
+ for (level = 0, pbo = bo->parent; pbo; ++level)
+ pbo = pbo->parent;
+
+ level += params->adev->vm_manager.root_level;
+ amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
+ pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
+ return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
+ 1, 0, flags);
+}
+
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+ uint64_t *flags)
+{
+ /*
+ * Update no-retry flags with the corresponding TF
+ * no-retry combination.
+ */
+ if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+ *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+ *flags |= adev->gmc.noretry_flags;
+ }
+}
+
+/*
+ * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
+ *
+ * Make sure to set the right flags for the PTEs at the desired level.
+ */
+static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
+ struct amdgpu_bo_vm *pt,
+ unsigned int level,
+ uint64_t pe, uint64_t addr,
+ unsigned int count, uint32_t incr,
+ uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+
+ if (level != AMDGPU_VM_PTB) {
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
+
+ } else if (adev->asic_type >= CHIP_VEGA10 &&
+ !(flags & AMDGPU_PTE_VALID) &&
+ !(flags & AMDGPU_PTE_PRT)) {
+
+ /* Workaround for fault priority problem on GMC9 */
+ flags |= AMDGPU_PTE_EXECUTABLE;
+ }
+
+ /*
+ * Update no-retry flags to use the no-retry flag combination
+ * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
+ * does not work when TF is enabled. So, replace them with
+ * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
+ * all cases.
+ */
+ if (level == AMDGPU_VM_PTB)
+ amdgpu_vm_pte_update_noretry_flags(adev, &flags);
+
+ /* APUs mapping system memory may need different MTYPEs on different
+ * NUMA nodes. Only do this for contiguous ranges that can be assumed
+ * to be on the same NUMA node.
+ */
+ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+ adev->gmc.gmc_funcs->override_vm_pte_flags &&
+ num_possible_nodes() > 1) {
+ if (!params->pages_addr)
+ amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
+ addr, &flags);
+ else
+ dev_dbg(adev->dev,
+ "override_vm_pte_flags skipped: non-contiguous\n");
+ }
+
+ params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
+ flags);
+}
+
+/**
+ * amdgpu_vm_pte_fragment - get fragment for PTEs
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
+ *
+ * Returns the first possible fragment for the start and end address.
+ */
+static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end, uint64_t flags,
+ unsigned int *frag, uint64_t *frag_end)
+{
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ *
+ * Starting with Vega10 the fragment size only controls the L1. The L2
+ * is now directly feed with small/huge/giant pages from the walker.
+ */
+ unsigned int max_frag;
+
+ if (params->adev->asic_type < CHIP_VEGA10)
+ max_frag = params->adev->vm_manager.fragment_size;
+ else
+ max_frag = 31;
+
+ /* system pages are non continuously */
+ if (params->pages_addr) {
+ *frag = 0;
+ *frag_end = end;
+ return;
+ }
+
+ /* This intentionally wraps around if no bit is set */
+ *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
+ if (*frag >= max_frag) {
+ *frag = max_frag;
+ *frag_end = end & ~((1ULL << max_frag) - 1);
+ } else {
+ *frag_end = start + (1 << *frag);
+ }
+}
+
+/**
+ * amdgpu_vm_ptes_update - make sure that page tables are valid
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to, the next dst inside the function
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+ struct amdgpu_vm_pt_cursor cursor;
+ uint64_t frag_start = start, frag_end;
+ unsigned int frag;
+ int r;
+
+ /* figure out the initial fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
+ &frag_end);
+
+ /* walk over the address space and update the PTs */
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+ while (cursor.pfn < end) {
+ unsigned int shift, parent_shift, mask;
+ uint64_t incr, entry_end, pe_start;
+ struct amdgpu_bo *pt;
+
+ if (!params->unlocked) {
+ /* make sure that the page tables covering the
+ * address range are actually allocated
+ */
+ r = amdgpu_vm_pt_alloc(params->adev, params->vm,
+ &cursor, params->immediate);
+ if (r)
+ return r;
+ }
+
+ shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+ parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+ if (params->unlocked) {
+ /* Unlocked updates are only allowed on the leaves */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (adev->asic_type < CHIP_VEGA10 &&
+ (flags & AMDGPU_PTE_VALID)) {
+ /* No huge page support before GMC v9 */
+ if (cursor.level != AMDGPU_VM_PTB) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ }
+ } else if (frag < shift) {
+ /* We can't use this level when the fragment size is
+ * smaller than the address shift. Go to the next
+ * child entry and try again.
+ */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (frag >= parent_shift) {
+ /* If the fragment size is even larger than the parent
+ * shift we should go up one level and check it again.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+ continue;
+ }
+
+ pt = cursor.entry->bo;
+ if (!pt) {
+ /* We need all PDs and PTs for mapping something, */
+ if (flags & AMDGPU_PTE_VALID)
+ return -ENOENT;
+
+ /* but unmapping something can happen at a higher
+ * level.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+
+ pt = cursor.entry->bo;
+ shift = parent_shift;
+ frag_end = max(frag_end, ALIGN(frag_start + 1,
+ 1ULL << shift));
+ }
+
+ /* Looks good so far, calculate parameters for the update */
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
+ entry_end = ((uint64_t)mask + 1) << shift;
+ entry_end += cursor.pfn & ~(entry_end - 1);
+ entry_end = min(entry_end, end);
+
+ do {
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t upd_end = min(entry_end, frag_end);
+ unsigned int nptes = (upd_end - frag_start) >> shift;
+ uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+
+ /* This can happen when we set higher level PDs to
+ * silent to stop fault floods.
+ */
+ nptes = max(nptes, 1u);
+
+ trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+ min(nptes, 32u), dst, incr,
+ upd_flags,
+ vm->task_info.tgid,
+ vm->immediate.fence_context);
+ amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
+ cursor.level, pe_start, dst,
+ nptes, incr, upd_flags);
+
+ pe_start += nptes * 8;
+ dst += nptes * incr;
+
+ frag_start = upd_end;
+ if (frag_start >= frag_end) {
+ /* figure out the next fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end,
+ flags, &frag, &frag_end);
+ if (frag < shift)
+ break;
+ }
+ } while (frag_start < entry_end);
+
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
+ while (cursor.pfn < frag_start) {
+ /* Make sure previous mapping is freed */
+ if (cursor.entry->bo) {
+ params->table_freed = true;
+ amdgpu_vm_pt_free_dfs(adev, params->vm,
+ &cursor,
+ params->unlocked);
+ }
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+
+ } else if (frag >= shift) {
+ /* or just move on to the next on the same level. */
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * make root page directory and everything below it cpu accessible.
+ */
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+
+ struct amdgpu_bo_vm *bo;
+ int r;
+
+ if (entry->bo) {
+ bo = to_amdgpu_bo_vm(entry->bo);
+ r = vm->update_funcs->map_table(bo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
new file mode 100644
index 000000000..349416e17
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_VM_SDMA_MIN_NUM_DW 256u
+#define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u)
+
+/**
+ * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
+{
+ int r;
+
+ r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
+ if (r)
+ return r;
+
+ if (table->shadow)
+ r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
+
+ return r;
+}
+
+/* Allocate a new job for @count PTE updates */
+static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
+ unsigned int count)
+{
+ enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+ : AMDGPU_IB_POOL_DELAYED;
+ struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
+ : &p->vm->delayed;
+ unsigned int ndw;
+ int r;
+
+ /* estimate how many dw we need */
+ ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+ if (p->pages_addr)
+ ndw += count * 2;
+ ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
+
+ r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
+ ndw * 4, pool, &p->job);
+ if (r)
+ return r;
+
+ p->num_dw_left = ndw;
+ return 0;
+}
+
+/**
+ * amdgpu_vm_sdma_prepare - prepare SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @resv: reservation object with embedded fence
+ * @sync_mode: synchronization mode
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
+ struct dma_resv *resv,
+ enum amdgpu_sync_mode sync_mode)
+{
+ struct amdgpu_sync sync;
+ int r;
+
+ r = amdgpu_vm_sdma_alloc_job(p, 0);
+ if (r)
+ return r;
+
+ if (!resv)
+ return 0;
+
+ amdgpu_sync_create(&sync);
+ r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
+ if (!r)
+ r = amdgpu_sync_push_to_job(&sync, p->job);
+ amdgpu_sync_free(&sync);
+ return r;
+}
+
+/**
+ * amdgpu_vm_sdma_commit - commit SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @fence: resulting fence
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
+ struct dma_fence **fence)
+{
+ struct amdgpu_ib *ib = p->job->ibs;
+ struct amdgpu_ring *ring;
+ struct dma_fence *f;
+
+ ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
+ sched);
+
+ WARN_ON(ib->length_dw == 0);
+ amdgpu_ring_pad_ib(ring, ib);
+ WARN_ON(ib->length_dw > p->num_dw_left);
+ f = amdgpu_job_submit(p->job);
+
+ if (p->unlocked) {
+ struct dma_fence *tmp = dma_fence_get(f);
+
+ swap(p->vm->last_unlocked, tmp);
+ dma_fence_put(tmp);
+ } else {
+ dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+
+ if (fence && !p->immediate) {
+ /*
+ * Most hw generations now have a separate queue for page table
+ * updates, but when the queue is shared with userspace we need
+ * the extra CPU round trip to correctly flush the TLB.
+ */
+ set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
+ swap(*fence, f);
+ }
+ dma_fence_put(f);
+ return 0;
+}
+
+/**
+ * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: addr of the page entry
+ * @count: number of page entries to copy
+ *
+ * Traces the parameters and calls the DMA function to copy the PTEs.
+ */
+static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo *bo, uint64_t pe,
+ unsigned count)
+{
+ struct amdgpu_ib *ib = p->job->ibs;
+ uint64_t src = ib->gpu_addr;
+
+ src += p->num_dw_left * 4;
+
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
+ trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
+
+ amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
+}
+
+/**
+ * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Traces the parameters and calls the right asic functions
+ * to setup the page table using the DMA.
+ */
+static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo *bo, uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ struct amdgpu_ib *ib = p->job->ibs;
+
+ pe += amdgpu_bo_gpu_offset_no_check(bo);
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
+ if (count < 3) {
+ amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
+ count, incr);
+ } else {
+ amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
+ count, incr, flags);
+ }
+}
+
+/**
+ * amdgpu_vm_sdma_update - execute VM update
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @vmbo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Reserve space in the IB, setup mapping buffer on demand and write commands to
+ * the IB.
+ */
+static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
+ struct amdgpu_bo_vm *vmbo, uint64_t pe,
+ uint64_t addr, unsigned count, uint32_t incr,
+ uint64_t flags)
+{
+ struct amdgpu_bo *bo = &vmbo->bo;
+ struct dma_resv_iter cursor;
+ unsigned int i, ndw, nptes;
+ struct dma_fence *fence;
+ uint64_t *pte;
+ int r;
+
+ /* Wait for PD/PT moves to be completed */
+ dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ dma_fence_get(fence);
+ r = drm_sched_job_add_dependency(&p->job->base, fence);
+ if (r) {
+ dma_fence_put(fence);
+ dma_resv_iter_end(&cursor);
+ return r;
+ }
+ }
+ dma_resv_iter_end(&cursor);
+
+ do {
+ ndw = p->num_dw_left;
+ ndw -= p->job->ibs->length_dw;
+
+ if (ndw < 32) {
+ r = amdgpu_vm_sdma_commit(p, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_sdma_alloc_job(p, count);
+ if (r)
+ return r;
+ }
+
+ if (!p->pages_addr) {
+ /* set page commands needed */
+ if (vmbo->shadow)
+ amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
+ count, incr, flags);
+ amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
+ incr, flags);
+ return 0;
+ }
+
+ /* copy commands needed */
+ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
+ (vmbo->shadow ? 2 : 1);
+
+ /* for padding */
+ ndw -= 7;
+
+ nptes = min(count, ndw / 2);
+
+ /* Put the PTEs at the end of the IB. */
+ p->num_dw_left -= nptes * 2;
+ pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
+ for (i = 0; i < nptes; ++i, addr += incr) {
+ pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
+ pte[i] |= flags;
+ }
+
+ if (vmbo->shadow)
+ amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
+ amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
+
+ pe += nptes * 8;
+ count -= nptes;
+ } while (count);
+
+ return 0;
+}
+
+const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
+ .map_table = amdgpu_vm_sdma_map_table,
+ .prepare = amdgpu_vm_sdma_prepare,
+ .update = amdgpu_vm_sdma_update,
+ .commit = amdgpu_vm_sdma_commit
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
new file mode 100644
index 000000000..c7085a747
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -0,0 +1,948 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#include <linux/dma-mapping.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_res_cursor.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
+
+struct amdgpu_vram_reservation {
+ u64 start;
+ u64 size;
+ struct list_head allocated;
+ struct list_head blocks;
+};
+
+static inline struct amdgpu_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
+{
+ return container_of(man, struct amdgpu_vram_mgr, manager);
+}
+
+static inline struct amdgpu_device *
+to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
+{
+ return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
+}
+
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = amdgpu_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block, link);
+ if (start + size != amdgpu_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+
+
+/**
+ * DOC: mem_info_vram_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total VRAM
+ * available on the device
+ * The file mem_info_vram_total is used for this and returns the total
+ * amount of VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%llu\n", adev->gmc.real_vram_size);
+}
+
+/**
+ * DOC: mem_info_vis_vram_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total
+ * visible VRAM available on the device
+ * The file mem_info_vis_vram_total is used for this and returns the total
+ * amount of visible VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%llu\n", adev->gmc.visible_vram_size);
+}
+
+/**
+ * DOC: mem_info_vram_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total VRAM
+ * available on the device
+ * The file mem_info_vram_used is used for this and returns the total
+ * amount of currently used VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man = &adev->mman.vram_mgr.manager;
+
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+/**
+ * DOC: mem_info_vis_vram_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total of
+ * used visible VRAM
+ * The file mem_info_vis_vram_used is used for this and returns the total
+ * amount of currently used visible VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%llu\n",
+ amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr));
+}
+
+/**
+ * DOC: mem_info_vram_vendor
+ *
+ * The amdgpu driver provides a sysfs API for reporting the vendor of the
+ * installed VRAM
+ * The file mem_info_vram_vendor is used for this and returns the name of the
+ * vendor.
+ */
+static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ switch (adev->gmc.vram_vendor) {
+ case SAMSUNG:
+ return sysfs_emit(buf, "samsung\n");
+ case INFINEON:
+ return sysfs_emit(buf, "infineon\n");
+ case ELPIDA:
+ return sysfs_emit(buf, "elpida\n");
+ case ETRON:
+ return sysfs_emit(buf, "etron\n");
+ case NANYA:
+ return sysfs_emit(buf, "nanya\n");
+ case HYNIX:
+ return sysfs_emit(buf, "hynix\n");
+ case MOSEL:
+ return sysfs_emit(buf, "mosel\n");
+ case WINBOND:
+ return sysfs_emit(buf, "winbond\n");
+ case ESMT:
+ return sysfs_emit(buf, "esmt\n");
+ case MICRON:
+ return sysfs_emit(buf, "micron\n");
+ default:
+ return sysfs_emit(buf, "unknown\n");
+ }
+}
+
+static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
+ amdgpu_mem_info_vram_total_show, NULL);
+static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
+ amdgpu_mem_info_vis_vram_total_show,NULL);
+static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
+ amdgpu_mem_info_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
+ amdgpu_mem_info_vis_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
+ amdgpu_mem_info_vram_vendor, NULL);
+
+static struct attribute *amdgpu_vram_mgr_attributes[] = {
+ &dev_attr_mem_info_vram_total.attr,
+ &dev_attr_mem_info_vis_vram_total.attr,
+ &dev_attr_mem_info_vram_used.attr,
+ &dev_attr_mem_info_vis_vram_used.attr,
+ &dev_attr_mem_info_vram_vendor.attr,
+ NULL
+};
+
+const struct attribute_group amdgpu_vram_mgr_attr_group = {
+ .attrs = amdgpu_vram_mgr_attributes
+};
+
+/**
+ * amdgpu_vram_mgr_vis_size - Calculate visible block size
+ *
+ * @adev: amdgpu_device pointer
+ * @block: DRM BUDDY block structure
+ *
+ * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
+ */
+static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
+ struct drm_buddy_block *block)
+{
+ u64 start = amdgpu_vram_mgr_block_start(block);
+ u64 end = start + amdgpu_vram_mgr_block_size(block);
+
+ if (start >= adev->gmc.visible_vram_size)
+ return 0;
+
+ return (end > adev->gmc.visible_vram_size ?
+ adev->gmc.visible_vram_size : end) - start;
+}
+
+/**
+ * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_resource *res = bo->tbo.resource;
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+ u64 usage = 0;
+
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+ return amdgpu_bo_size(bo);
+
+ if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ return 0;
+
+ list_for_each_entry(block, &vres->blocks, link)
+ usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+ return usage;
+}
+
+/* Commit the reservation of VRAM pages */
+static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
+{
+ struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct drm_buddy *mm = &mgr->mm;
+ struct amdgpu_vram_reservation *rsv, *temp;
+ struct drm_buddy_block *block;
+ uint64_t vis_usage;
+
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
+ if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ rsv->size, mm->chunk_size, &rsv->allocated,
+ DRM_BUDDY_RANGE_ALLOCATION))
+ continue;
+
+ block = amdgpu_vram_mgr_first_block(&rsv->allocated);
+ if (!block)
+ continue;
+
+ dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+ rsv->start, rsv->size);
+
+ vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+ atomic64_add(vis_usage, &mgr->vis_usage);
+ spin_lock(&man->bdev->lru_lock);
+ man->usage += rsv->size;
+ spin_unlock(&man->bdev->lru_lock);
+ list_move(&rsv->blocks, &mgr->reserved_pages);
+ }
+}
+
+/**
+ * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ * @start: start address of the range in VRAM
+ * @size: size of the range
+ *
+ * Reserve memory from start address with the specified size in VRAM
+ */
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
+ uint64_t start, uint64_t size)
+{
+ struct amdgpu_vram_reservation *rsv;
+
+ rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
+ if (!rsv)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rsv->allocated);
+ INIT_LIST_HEAD(&rsv->blocks);
+
+ rsv->start = start;
+ rsv->size = size;
+
+ mutex_lock(&mgr->lock);
+ list_add_tail(&rsv->blocks, &mgr->reservations_pending);
+ amdgpu_vram_mgr_do_reserve(&mgr->manager);
+ mutex_unlock(&mgr->lock);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_query_page_status - query the reservation status
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ * @start: start address of a page in VRAM
+ *
+ * Returns:
+ * -EBUSY: the page is still hold and in pending list
+ * 0: the page has been reserved
+ * -ENOENT: the input page is not a reservation
+ */
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
+ uint64_t start)
+{
+ struct amdgpu_vram_reservation *rsv;
+ int ret;
+
+ mutex_lock(&mgr->lock);
+
+ list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = -ENOENT;
+out:
+ mutex_unlock(&mgr->lock);
+ return ret;
+}
+
+static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
+}
+
+static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
+ return false;
+}
+
+static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
+ return true;
+}
+
+static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
+}
+
+static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
+ return -ENOSPC;
+}
+
+/**
+ * amdgpu_vram_mgr_new - allocate new ranges
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
+ struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct amdgpu_vram_mgr_resource *vres;
+ u64 size, remaining_size, lpfn, fpfn;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ unsigned long pages_per_block;
+ int r;
+
+ lpfn = (u64)place->lpfn << PAGE_SHIFT;
+ if (!lpfn)
+ lpfn = man->size;
+
+ fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
+ max_bytes = adev->gmc.mc_vram_size;
+ if (tbo->type != ttm_bo_type_kernel)
+ max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ pages_per_block = ~0ul;
+ } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pages_per_block = HPAGE_PMD_NR;
+#else
+ /* default to 2MB */
+ pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+ pages_per_block = max_t(uint32_t, pages_per_block,
+ tbo->page_alignment);
+ }
+
+ vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+ if (!vres)
+ return -ENOMEM;
+
+ ttm_resource_init(tbo, place, &vres->base);
+
+ /* bail out quickly if there's likely not enough VRAM for this BO */
+ if (ttm_resource_manager_usage(man) > max_bytes) {
+ r = -ENOSPC;
+ goto error_fini;
+ }
+
+ INIT_LIST_HEAD(&vres->blocks);
+
+ if (place->flags & TTM_PL_FLAG_TOPDOWN)
+ vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+ if (fpfn || lpfn != mgr->mm.size)
+ /* Allocate blocks in desired range */
+ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+ remaining_size = (u64)vres->base.size;
+
+ mutex_lock(&mgr->lock);
+ while (remaining_size) {
+ if (tbo->page_alignment)
+ min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
+ else
+ min_block_size = mgr->default_page_size;
+
+ BUG_ON(min_block_size < mm->chunk_size);
+
+ /* Limit maximum size to 2GiB due to SG table limitations */
+ size = min(remaining_size, 2ULL << 30);
+
+ if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+
+ cur_size = size;
+
+ if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
+ /*
+ * Except for actual range allocation, modify the size and
+ * min_block_size conforming to continuous flag enablement
+ */
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ size = roundup_pow_of_two(size);
+ min_block_size = size;
+ /*
+ * Modify the size value if size is not
+ * aligned with min_block_size
+ */
+ } else if (!IS_ALIGNED(size, min_block_size)) {
+ size = round_up(size, min_block_size);
+ }
+ }
+
+ r = drm_buddy_alloc_blocks(mm, fpfn,
+ lpfn,
+ size,
+ min_block_size,
+ &vres->blocks,
+ vres->flags);
+ if (unlikely(r))
+ goto error_free_blocks;
+
+ if (size > remaining_size)
+ remaining_size = 0;
+ else
+ remaining_size -= size;
+ }
+ mutex_unlock(&mgr->lock);
+
+ if (cur_size != size) {
+ struct drm_buddy_block *block;
+ struct list_head *trim_list;
+ u64 original_size;
+ LIST_HEAD(temp);
+
+ trim_list = &vres->blocks;
+ original_size = (u64)vres->base.size;
+
+ /*
+ * If size value is rounded up to min_block_size, trim the last
+ * block to the required size
+ */
+ if (!list_is_singular(&vres->blocks)) {
+ block = list_last_entry(&vres->blocks, typeof(*block), link);
+ list_move_tail(&block->link, &temp);
+ trim_list = &temp;
+ /*
+ * Compute the original_size value by subtracting the
+ * last block size with (aligned size - original size)
+ */
+ original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
+ }
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_block_trim(mm,
+ original_size,
+ trim_list);
+ mutex_unlock(&mgr->lock);
+
+ if (!list_empty(&temp))
+ list_splice_tail(trim_list, &vres->blocks);
+ }
+
+ vres->base.start = 0;
+ list_for_each_entry(block, &vres->blocks, link) {
+ unsigned long start;
+
+ start = amdgpu_vram_mgr_block_start(block) +
+ amdgpu_vram_mgr_block_size(block);
+ start >>= PAGE_SHIFT;
+
+ if (start > PFN_UP(vres->base.size))
+ start -= PFN_UP(vres->base.size);
+ else
+ start = 0;
+ vres->base.start = max(vres->base.start, start);
+
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ }
+
+ if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
+ vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+ if (adev->gmc.xgmi.connected_to_cpu)
+ vres->base.bus.caching = ttm_cached;
+ else
+ vres->base.bus.caching = ttm_write_combined;
+
+ atomic64_add(vis_usage, &mgr->vis_usage);
+ *res = &vres->base;
+ return 0;
+
+error_free_blocks:
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
+error_fini:
+ ttm_resource_fini(man, &vres->base);
+ kfree(vres);
+
+ return r;
+}
+
+/**
+ * amdgpu_vram_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated VRAM again.
+ */
+static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ uint64_t vis_usage = 0;
+
+ mutex_lock(&mgr->lock);
+ list_for_each_entry(block, &vres->blocks, link)
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+ amdgpu_vram_mgr_do_reserve(man);
+
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
+
+ atomic64_sub(vis_usage, &mgr->vis_usage);
+
+ ttm_resource_fini(man, res);
+ kfree(vres);
+}
+
+/**
+ * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
+ *
+ * @adev: amdgpu device pointer
+ * @res: TTM memory object
+ * @offset: byte offset from the base of VRAM BO
+ * @length: number of bytes to export in sg_table
+ * @dev: the other device
+ * @dir: dma direction
+ * @sgt: resulting sg table
+ *
+ * Allocate and fill a sg table from a VRAM allocation.
+ */
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+ struct ttm_resource *res,
+ u64 offset, u64 length,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt)
+{
+ struct amdgpu_res_cursor cursor;
+ struct scatterlist *sg;
+ int num_entries = 0;
+ int i, r;
+
+ *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+ if (!*sgt)
+ return -ENOMEM;
+
+ /* Determine the number of DRM_BUDDY blocks to export */
+ amdgpu_res_first(res, offset, length, &cursor);
+ while (cursor.remaining) {
+ num_entries++;
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+ if (r)
+ goto error_free;
+
+ /* Initialize scatterlist nodes of sg_table */
+ for_each_sgtable_sg((*sgt), sg, i)
+ sg->length = 0;
+
+ /*
+ * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_BUDDY block
+ * and the number of bytes from it. Access the following
+ * DRM_BUDDY block(s) if more buffer needs to exported
+ */
+ amdgpu_res_first(res, offset, length, &cursor);
+ for_each_sgtable_sg((*sgt), sg, i) {
+ phys_addr_t phys = cursor.start + adev->gmc.aper_base;
+ size_t size = cursor.size;
+ dma_addr_t addr;
+
+ addr = dma_map_resource(dev, phys, size, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ r = dma_mapping_error(dev, addr);
+ if (r)
+ goto error_unmap;
+
+ sg_set_page(sg, NULL, size, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = size;
+
+ amdgpu_res_next(&cursor, cursor.size);
+ }
+
+ return 0;
+
+error_unmap:
+ for_each_sgtable_sg((*sgt), sg, i) {
+ if (!sg->length)
+ continue;
+
+ dma_unmap_resource(dev, sg->dma_address,
+ sg->length, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+ sg_free_table(*sgt);
+
+error_free:
+ kfree(*sgt);
+ return r;
+}
+
+/**
+ * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table
+ *
+ * @dev: device pointer
+ * @dir: data direction of resource to unmap
+ * @sgt: sg table to free
+ *
+ * Free a previously allocate sg table.
+ */
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table *sgt)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sgtable_sg(sgt, sg, i)
+ dma_unmap_resource(dev, sg->dma_address,
+ sg->length, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+/**
+ * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ *
+ * Returns how many bytes are used in the visible part of VRAM
+ */
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
+{
+ return atomic64_read(&mgr->vis_usage);
+}
+
+/**
+ * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for intersection for eviction decision.
+ */
+static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (place->fpfn < lpfn &&
+ (!place->lpfn || place->lpfn > fpfn))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for placement compatibility.
+ */
+static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+ struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+
+ /* Check each drm buddy block individually */
+ list_for_each_entry(block, &mgr->blocks, link) {
+ unsigned long fpfn =
+ amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+ if (fpfn < place->fpfn ||
+ (place->lpfn && lpfn > place->lpfn))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * amdgpu_vram_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @printer: DRM printer to use
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+ struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+ struct amdgpu_vram_reservation *rsv;
+
+ drm_printf(printer, " vis usage:%llu\n",
+ amdgpu_vram_mgr_vis_usage(mgr));
+
+ mutex_lock(&mgr->lock);
+ drm_printf(printer, "default_page_size: %lluKiB\n",
+ mgr->default_page_size >> 10);
+
+ drm_buddy_print(mm, printer);
+
+ drm_printf(printer, "reserved:\n");
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
+ drm_printf(printer, "%#018llx-%#018llx: %llu\n",
+ rsv->start, rsv->start + rsv->size, rsv->size);
+ mutex_unlock(&mgr->lock);
+}
+
+static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
+ .alloc = amdgpu_dummy_vram_mgr_new,
+ .free = amdgpu_dummy_vram_mgr_del,
+ .intersects = amdgpu_dummy_vram_mgr_intersects,
+ .compatible = amdgpu_dummy_vram_mgr_compatible,
+ .debug = amdgpu_dummy_vram_mgr_debug
+};
+
+static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
+ .alloc = amdgpu_vram_mgr_new,
+ .free = amdgpu_vram_mgr_del,
+ .intersects = amdgpu_vram_mgr_intersects,
+ .compatible = amdgpu_vram_mgr_compatible,
+ .debug = amdgpu_vram_mgr_debug
+};
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int err;
+
+ ttm_resource_manager_init(man, &adev->mman.bdev,
+ adev->gmc.real_vram_size);
+
+ mutex_init(&mgr->lock);
+ INIT_LIST_HEAD(&mgr->reservations_pending);
+ INIT_LIST_HEAD(&mgr->reserved_pages);
+ mgr->default_page_size = PAGE_SIZE;
+
+ if (!adev->gmc.is_app_apu) {
+ man->func = &amdgpu_vram_mgr_func;
+
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
+ } else {
+ man->func = &amdgpu_dummy_vram_mgr_func;
+ DRM_INFO("Setup dummy vram mgr\n");
+ }
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+ struct amdgpu_vram_reservation *rsv, *temp;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ mutex_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
+ kfree(rsv);
+
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+ kfree(rsv);
+ }
+ if (!adev->gmc.is_app_apu)
+ drm_buddy_fini(&mgr->mm);
+ mutex_unlock(&mgr->lock);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000..0e04e42cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_buddy mm;
+ /* protects access to buffer objects */
+ struct mutex lock;
+ struct list_head reservations_pending;
+ struct list_head reserved_pages;
+ atomic64_t vis_usage;
+ u64 default_page_size;
+};
+
+struct amdgpu_vram_mgr_resource {
+ struct ttm_resource base;
+ struct list_head blocks;
+ unsigned long flags;
+};
+
+static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+ return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct amdgpu_vram_mgr_resource *
+to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
+{
+ return container_of(res, struct amdgpu_vram_mgr_resource, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
new file mode 100644
index 000000000..565a1fa43
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_drv.h"
+
+#include <drm/drm_drv.h>
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
+{
+ int (*run_func)(void *handle, uint32_t inst_mask);
+ int ret = 0;
+
+ if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
+ return 0;
+
+ run_func = NULL;
+
+ switch (xcp_state) {
+ case AMDGPU_XCP_PREPARE_SUSPEND:
+ run_func = xcp_ip->ip_funcs->prepare_suspend;
+ break;
+ case AMDGPU_XCP_SUSPEND:
+ run_func = xcp_ip->ip_funcs->suspend;
+ break;
+ case AMDGPU_XCP_PREPARE_RESUME:
+ run_func = xcp_ip->ip_funcs->prepare_resume;
+ break;
+ case AMDGPU_XCP_RESUME:
+ run_func = xcp_ip->ip_funcs->resume;
+ break;
+ }
+
+ if (run_func)
+ ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
+
+ return ret;
+}
+
+static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ int state)
+{
+ struct amdgpu_xcp_ip *xcp_ip;
+ struct amdgpu_xcp *xcp;
+ int i, ret;
+
+ if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
+ return -EINVAL;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
+ xcp_ip = &xcp->ip[i];
+ ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_SUSPEND);
+}
+
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
+}
+
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+ AMDGPU_XCP_PREPARE_RESUME);
+}
+
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+ return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
+}
+
+static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_xcp *xcp;
+
+ if (!ip)
+ return;
+
+ xcp = &xcp_mgr->xcp[xcp_id];
+ xcp->ip[ip->ip_id] = *ip;
+ xcp->ip[ip->ip_id].valid = true;
+
+ xcp->valid = true;
+}
+
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ struct amdgpu_xcp_ip ip;
+ uint8_t mem_id;
+ int i, j, ret;
+
+ if (!num_xcps || num_xcps > MAX_XCP)
+ return -EINVAL;
+
+ xcp_mgr->mode = mode;
+
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].valid = false;
+
+ /* This is needed for figuring out memory id of xcp */
+ xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < num_xcps; ++i) {
+ for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
+ ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
+ &ip);
+ if (ret)
+ continue;
+
+ __amdgpu_xcp_add_block(xcp_mgr, i, &ip);
+ }
+
+ xcp_mgr->xcp[i].id = i;
+
+ if (xcp_mgr->funcs->get_xcp_mem_id) {
+ ret = xcp_mgr->funcs->get_xcp_mem_id(
+ xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+ if (ret)
+ continue;
+ else
+ xcp_mgr->xcp[i].mem_id = mem_id;
+ }
+ }
+
+ xcp_mgr->num_xcps = num_xcps;
+ amdgpu_xcp_update_partition_sched_list(adev);
+
+ return 0;
+}
+
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int ret, curr_mode, num_xcps = 0;
+
+ if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+ return -EINVAL;
+
+ if (xcp_mgr->mode == mode)
+ return 0;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
+ return 0;
+
+ mutex_lock(&xcp_mgr->xcp_lock);
+
+ curr_mode = xcp_mgr->mode;
+ /* State set to transient mode */
+ xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
+ ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
+
+ if (ret) {
+ /* Failed, get whatever mode it's at now */
+ if (xcp_mgr->funcs->query_partition_mode)
+ xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+ xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+ else
+ xcp_mgr->mode = curr_mode;
+
+ goto out;
+ }
+
+out:
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return ret;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int mode;
+
+ if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return xcp_mgr->mode;
+
+ if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+ return xcp_mgr->mode;
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_lock(&xcp_mgr->xcp_lock);
+ mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
+ dev_WARN(
+ xcp_mgr->adev->dev,
+ "Cached partition mode %d not matching with device mode %d",
+ xcp_mgr->mode, mode);
+
+ if (!(flags & AMDGPU_XCP_FL_LOCKED))
+ mutex_unlock(&xcp_mgr->xcp_lock);
+
+ return mode;
+}
+
+static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ struct drm_device *ddev;
+ int i, ret;
+
+ ddev = adev_to_drm(adev);
+
+ /* xcp #0 shares drm device setting with adev */
+ adev->xcp_mgr->xcp->ddev = ddev;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
+ if (ret == -ENOSPC) {
+ dev_warn(adev->dev,
+ "Skip xcp node #%d when out of drm node resource.", i);
+ return 0;
+ } else if (ret) {
+ return ret;
+ }
+
+ /* Redirect all IOCTLs to the primary device */
+ adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
+ adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
+ adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
+ adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
+ p_ddev->render->dev = ddev;
+ p_ddev->primary->dev = ddev;
+ p_ddev->vma_offset_manager = ddev->vma_offset_manager;
+ p_ddev->driver = &amdgpu_partition_driver;
+ adev->xcp_mgr->xcp[i].ddev = p_ddev;
+ }
+
+ return 0;
+}
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_num_xcps,
+ struct amdgpu_xcp_mgr_funcs *xcp_funcs)
+{
+ struct amdgpu_xcp_mgr *xcp_mgr;
+
+ if (!xcp_funcs || !xcp_funcs->switch_partition_mode ||
+ !xcp_funcs->get_ip_details)
+ return -EINVAL;
+
+ xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
+
+ if (!xcp_mgr)
+ return -ENOMEM;
+
+ xcp_mgr->adev = adev;
+ xcp_mgr->funcs = xcp_funcs;
+ xcp_mgr->mode = init_mode;
+ mutex_init(&xcp_mgr->xcp_lock);
+
+ if (init_mode != AMDGPU_XCP_MODE_NONE)
+ amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
+
+ adev->xcp_mgr = xcp_mgr;
+
+ return amdgpu_xcp_dev_alloc(adev);
+}
+
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance)
+{
+ struct amdgpu_xcp *xcp;
+ int i, id_mask = 0;
+
+ if (ip >= AMDGPU_XCP_MAX_BLOCKS)
+ return -EINVAL;
+
+ for (i = 0; i < xcp_mgr->num_xcps; ++i) {
+ xcp = &xcp_mgr->xcp[i];
+ if ((xcp->valid) && (xcp->ip[ip].valid) &&
+ (xcp->ip[ip].inst_mask & BIT(instance)))
+ id_mask |= BIT(i);
+ }
+
+ if (!id_mask)
+ id_mask = -ENXIO;
+
+ return id_mask;
+}
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask)
+{
+ if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+ return -EINVAL;
+
+ *inst_mask = xcp->ip[ip].inst_mask;
+
+ return 0;
+}
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent)
+{
+ int i, ret;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
+{
+ struct drm_device *p_ddev;
+ int i;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ p_ddev = adev->xcp_mgr->xcp[i].ddev;
+ drm_dev_unplug(p_ddev);
+ p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
+ p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
+ p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
+ p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+ }
+}
+
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv)
+{
+ int i;
+
+ if (!adev->xcp_mgr)
+ return 0;
+
+ fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ for (i = 0; i < MAX_XCP; ++i) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
+ if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
+ if (adev->xcp_mgr->xcp[i].valid == FALSE) {
+ dev_err(adev->dev, "renderD%d partition %d not valid!",
+ file_priv->minor->index, i);
+ return -ENOENT;
+ }
+ dev_dbg(adev->dev, "renderD%d partition %d opened!",
+ file_priv->minor->index, i);
+ fpriv->xcp_id = i;
+ break;
+ }
+ }
+
+ fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
+ adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
+ return 0;
+}
+
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity)
+{
+ struct drm_gpu_scheduler *sched;
+ struct amdgpu_ring *ring;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ sched = entity->entity.rq->sched;
+ if (sched->ready) {
+ ring = to_amdgpu_ring(entity->entity.rq->sched);
+ atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
new file mode 100644
index 000000000..9a1036aee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_XCP_H
+#define AMDGPU_XCP_H
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "amdgpu_ctx.h"
+
+#define MAX_XCP 8
+
+#define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
+
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
+struct amdgpu_fpriv;
+
+enum AMDGPU_XCP_IP_BLOCK {
+ AMDGPU_XCP_GFXHUB,
+ AMDGPU_XCP_GFX,
+ AMDGPU_XCP_SDMA,
+ AMDGPU_XCP_VCN,
+ AMDGPU_XCP_MAX_BLOCKS
+};
+
+enum AMDGPU_XCP_STATE {
+ AMDGPU_XCP_PREPARE_SUSPEND,
+ AMDGPU_XCP_SUSPEND,
+ AMDGPU_XCP_PREPARE_RESUME,
+ AMDGPU_XCP_RESUME,
+};
+
+struct amdgpu_xcp_ip_funcs {
+ int (*prepare_suspend)(void *handle, uint32_t inst_mask);
+ int (*suspend)(void *handle, uint32_t inst_mask);
+ int (*prepare_resume)(void *handle, uint32_t inst_mask);
+ int (*resume)(void *handle, uint32_t inst_mask);
+};
+
+struct amdgpu_xcp_ip {
+ struct amdgpu_xcp_ip_funcs *ip_funcs;
+ uint32_t inst_mask;
+
+ enum AMDGPU_XCP_IP_BLOCK ip_id;
+ bool valid;
+};
+
+struct amdgpu_xcp {
+ struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
+
+ uint8_t id;
+ uint8_t mem_id;
+ bool valid;
+ atomic_t ref_cnt;
+ struct drm_device *ddev;
+ struct drm_device *rdev;
+ struct drm_device *pdev;
+ struct drm_driver *driver;
+ struct drm_vma_offset_manager *vma_offset_manager;
+ struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+};
+
+struct amdgpu_xcp_mgr {
+ struct amdgpu_device *adev;
+ struct mutex xcp_lock;
+ struct amdgpu_xcp_mgr_funcs *funcs;
+
+ struct amdgpu_xcp xcp[MAX_XCP];
+ uint8_t num_xcps;
+ int8_t mode;
+
+ /* Used to determine KFD memory size limits per XCP */
+ unsigned int num_xcp_per_mem_partition;
+};
+
+struct amdgpu_xcp_mgr_funcs {
+ int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
+ int *num_xcps);
+ int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
+ int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip);
+ int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id);
+
+ int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+ int (*select_scheds)(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds);
+ int (*update_partition_sched_list)(struct amdgpu_device *adev);
+};
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+ int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum AMDGPU_XCP_IP_BLOCK ip, int instance);
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+ enum AMDGPU_XCP_IP_BLOCK ip,
+ uint32_t *inst_mask);
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+ const struct pci_device_id *ent);
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+ struct amdgpu_fpriv *fpriv,
+ struct drm_file *file_priv);
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+ struct amdgpu_ctx_entity *entity);
+
+#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
+ ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
+ (adev)->xcp_mgr->funcs->select_scheds ? \
+ (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT)
+#define amdgpu_xcp_update_partition_sched_list(adev) \
+ ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
+ (adev)->xcp_mgr->funcs->update_partition_sched_list ? \
+ (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0)
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr)
+ return 1;
+ else
+ return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+ if (!xcp_mgr)
+ return NULL;
+
+ while (*from < MAX_XCP) {
+ if (xcp_mgr->xcp[*from].valid)
+ return &xcp_mgr->xcp[*from];
+ ++(*from);
+ }
+
+ return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i) \
+ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+ xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
new file mode 100644
index 000000000..7e91b2478
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -0,0 +1,1125 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
+#include "soc15.h"
+#include "df/df_3_6_offset.h"
+#include "xgmi/xgmi_4_0_0_smn.h"
+#include "xgmi/xgmi_4_0_0_sh_mask.h"
+#include "xgmi/xgmi_6_1_0_sh_mask.h"
+#include "wafl/wafl2_4_0_0_smn.h"
+#include "wafl/wafl2_4_0_0_sh_mask.h"
+
+#include "amdgpu_reset.h"
+
+#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c
+#define smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK 0x11a00218
+#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
+#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+
+static DEFINE_MUTEX(xgmi_mutex);
+
+#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
+
+static LIST_HEAD(xgmi_hive_list);
+
+static const int xgmi_pcs_err_status_reg_vg20[] = {
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int wafl_pcs_err_status_reg_vg20[] = {
+ smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS,
+ smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int xgmi_pcs_err_status_reg_arct[] = {
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x100000,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x500000,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x600000,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x700000,
+ smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x800000,
+};
+
+/* same as vg20*/
+static const int wafl_pcs_err_status_reg_arct[] = {
+ smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS,
+ smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x200000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x300000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x400000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x500000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x600000,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x200000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x300000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x400000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x500000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x600000,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x700000
+};
+
+static const int walf_pcs_err_status_reg_aldebaran[] = {
+ smnPCS_GOPX1_PCS_ERROR_STATUS,
+ smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
+ {"XGMI PCS DataLossErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
+ {"XGMI PCS TrainingErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TrainingErr)},
+ {"XGMI PCS CRCErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, CRCErr)},
+ {"XGMI PCS BERExceededErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, BERExceededErr)},
+ {"XGMI PCS TxMetaDataErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TxMetaDataErr)},
+ {"XGMI PCS ReplayBufParityErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+ {"XGMI PCS DataParityErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataParityErr)},
+ {"XGMI PCS ReplayFifoOverflowErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+ {"XGMI PCS ReplayFifoUnderflowErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+ {"XGMI PCS ElasticFifoOverflowErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+ {"XGMI PCS DeskewErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DeskewErr)},
+ {"XGMI PCS DataStartupLimitErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+ {"XGMI PCS FCInitTimeoutErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+ {"XGMI PCS RecoveryTimeoutErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+ {"XGMI PCS ReadySerialTimeoutErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+ {"XGMI PCS ReadySerialAttemptErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+ {"XGMI PCS RecoveryAttemptErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+ {"XGMI PCS RecoveryRelockAttemptErr",
+ SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+};
+
+static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
+ {"WAFL PCS DataLossErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataLossErr)},
+ {"WAFL PCS TrainingErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TrainingErr)},
+ {"WAFL PCS CRCErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, CRCErr)},
+ {"WAFL PCS BERExceededErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, BERExceededErr)},
+ {"WAFL PCS TxMetaDataErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TxMetaDataErr)},
+ {"WAFL PCS ReplayBufParityErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayBufParityErr)},
+ {"WAFL PCS DataParityErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataParityErr)},
+ {"WAFL PCS ReplayFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+ {"WAFL PCS ReplayFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+ {"WAFL PCS ElasticFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+ {"WAFL PCS DeskewErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DeskewErr)},
+ {"WAFL PCS DataStartupLimitErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataStartupLimitErr)},
+ {"WAFL PCS FCInitTimeoutErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+ {"WAFL PCS RecoveryTimeoutErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+ {"WAFL PCS ReadySerialTimeoutErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+ {"WAFL PCS ReadySerialAttemptErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+ {"WAFL PCS RecoveryAttemptErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+ {"WAFL PCS RecoveryRelockAttemptErr",
+ SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+};
+
+static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
+ {"XGMI3X16 PCS DataLossErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)},
+ {"XGMI3X16 PCS TrainingErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)},
+ {"XGMI3X16 PCS FlowCtrlAckErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)},
+ {"XGMI3X16 PCS RxFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)},
+ {"XGMI3X16 PCS RxFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)},
+ {"XGMI3X16 PCS CRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)},
+ {"XGMI3X16 PCS BERExceededErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)},
+ {"XGMI3X16 PCS TxVcidDataErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)},
+ {"XGMI3X16 PCS ReplayBufParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+ {"XGMI3X16 PCS DataParityErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)},
+ {"XGMI3X16 PCS ReplayFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+ {"XGMI3X16 PCS ReplayFifoUnderflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+ {"XGMI3X16 PCS ElasticFifoOverflowErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+ {"XGMI3X16 PCS DeskewErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)},
+ {"XGMI3X16 PCS FlowCtrlCRCErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)},
+ {"XGMI3X16 PCS DataStartupLimitErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+ {"XGMI3X16 PCS FCInitTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+ {"XGMI3X16 PCS RecoveryTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+ {"XGMI3X16 PCS ReadySerialAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+ {"XGMI3X16 PCS RecoveryAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+ {"XGMI3X16 PCS RecoveryRelockAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+ {"XGMI3X16 PCS ReplayAttemptErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)},
+ {"XGMI3X16 PCS SyncHdrErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)},
+ {"XGMI3X16 PCS TxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)},
+ {"XGMI3X16 PCS RxReplayTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubTxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)},
+ {"XGMI3X16 PCS LinkSubRxTimeoutErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)},
+ {"XGMI3X16 PCS RxCMDPktErr",
+ SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
+};
+
+/**
+ * DOC: AMDGPU XGMI Support
+ *
+ * XGMI is a high speed interconnect that joins multiple GPU cards
+ * into a homogeneous memory space that is organized by a collective
+ * hive ID and individual node IDs, both of which are 64-bit numbers.
+ *
+ * The file xgmi_device_id contains the unique per GPU device ID and
+ * is stored in the /sys/class/drm/card${cardno}/device/ directory.
+ *
+ * Inside the device directory a sub-directory 'xgmi_hive_info' is
+ * created which contains the hive ID and the list of nodes.
+ *
+ * The hive ID is stored in:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
+ *
+ * The node information is stored in numbered directories:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
+ *
+ * Each device has their own xgmi_hive_info direction with a mirror
+ * set of node sub-directories.
+ *
+ * The XGMI memory space is built by contiguously adding the power of
+ * two padded VRAM space from each node to each other.
+ *
+ */
+
+static struct attribute amdgpu_xgmi_hive_id = {
+ .name = "xgmi_hive_id",
+ .mode = S_IRUGO
+};
+
+static struct attribute *amdgpu_xgmi_hive_attrs[] = {
+ &amdgpu_xgmi_hive_id,
+ NULL
+};
+ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
+
+static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct amdgpu_hive_info *hive = container_of(
+ kobj, struct amdgpu_hive_info, kobj);
+
+ if (attr == &amdgpu_xgmi_hive_id)
+ return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id);
+
+ return 0;
+}
+
+static void amdgpu_xgmi_hive_release(struct kobject *kobj)
+{
+ struct amdgpu_hive_info *hive = container_of(
+ kobj, struct amdgpu_hive_info, kobj);
+
+ amdgpu_reset_put_reset_domain(hive->reset_domain);
+ hive->reset_domain = NULL;
+
+ mutex_destroy(&hive->hive_lock);
+ kfree(hive);
+}
+
+static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
+ .show = amdgpu_xgmi_show_attrs,
+};
+
+static const struct kobj_type amdgpu_xgmi_hive_type = {
+ .release = amdgpu_xgmi_hive_release,
+ .sysfs_ops = &amdgpu_xgmi_hive_ops,
+ .default_groups = amdgpu_xgmi_hive_groups,
+};
+
+static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id);
+
+}
+
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0; i < top->num_nodes; i++)
+ sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+
+ return sysfs_emit(buf, "%s\n", buf);
+}
+
+#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
+static ssize_t amdgpu_xgmi_show_error(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
+ uint64_t fica_out;
+ unsigned int error_count = 0;
+
+ ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
+ ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
+
+ if ((!adev->df.funcs) ||
+ (!adev->df.funcs->get_fica) ||
+ (!adev->df.funcs->set_fica))
+ return -EINVAL;
+
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
+ if (fica_out != 0x1f)
+ pr_err("xGMI error counters not enabled!\n");
+
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
+
+ if ((fica_out & 0xffff) == 2)
+ error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
+
+ adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+
+ return sysfs_emit(buf, "%u\n", error_count);
+}
+
+
+static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+
+static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive)
+{
+ int ret = 0;
+ char node[10] = { 0 };
+
+ /* Create xgmi device id file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id);
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n");
+ return ret;
+ }
+
+ /* Create xgmi error file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
+ if (ret)
+ pr_err("failed to create xgmi_error\n");
+
+ /* Create xgmi num hops file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
+ if (ret)
+ pr_err("failed to create xgmi_num_hops\n");
+
+ /* Create xgmi num links file */
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (ret)
+ pr_err("failed to create xgmi_num_links\n");
+
+ /* Create sysfs link to hive info folder on the first device */
+ if (hive->kobj.parent != (&adev->dev->kobj)) {
+ ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
+ "xgmi_hive_info");
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create link to hive info");
+ goto remove_file;
+ }
+ }
+
+ sprintf(node, "node%d", atomic_read(&hive->number_devices));
+ /* Create sysfs link form the hive folder to yourself */
+ ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node);
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create link from hive info");
+ goto remove_link;
+ }
+
+ goto success;
+
+
+remove_link:
+ sysfs_remove_link(&adev->dev->kobj, adev_to_drm(adev)->unique);
+
+remove_file:
+ device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+
+success:
+ return ret;
+}
+
+static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive)
+{
+ char node[10];
+ memset(node, 0, sizeof(node));
+
+ device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_error);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+ device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+
+ if (hive->kobj.parent != (&adev->dev->kobj))
+ sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
+
+ sprintf(node, "node%d", atomic_read(&hive->number_devices));
+ sysfs_remove_link(&hive->kobj, node);
+
+}
+
+
+
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = NULL;
+ int ret;
+
+ if (!adev->gmc.xgmi.hive_id)
+ return NULL;
+
+ if (adev->hive) {
+ kobject_get(&adev->hive->kobj);
+ return adev->hive;
+ }
+
+ mutex_lock(&xgmi_mutex);
+
+ list_for_each_entry(hive, &xgmi_hive_list, node) {
+ if (hive->hive_id == adev->gmc.xgmi.hive_id)
+ goto pro_end;
+ }
+
+ hive = kzalloc(sizeof(*hive), GFP_KERNEL);
+ if (!hive) {
+ dev_err(adev->dev, "XGMI: allocation failed\n");
+ ret = -ENOMEM;
+ hive = NULL;
+ goto pro_end;
+ }
+
+ /* initialize new hive if not exist */
+ ret = kobject_init_and_add(&hive->kobj,
+ &amdgpu_xgmi_hive_type,
+ &adev->dev->kobj,
+ "%s", "xgmi_hive_info");
+ if (ret) {
+ dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
+ kobject_put(&hive->kobj);
+ hive = NULL;
+ goto pro_end;
+ }
+
+ /**
+ * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
+ * Host driver decide how to reset the GPU either through FLR or chain reset.
+ * Guest side will get individual notifications from the host for the FLR
+ * if necessary.
+ */
+ if (!amdgpu_sriov_vf(adev)) {
+ /**
+ * Avoid recreating reset domain when hive is reconstructed for the case
+ * of reset the devices in the XGMI hive during probe for passthrough GPU
+ * See https://www.spinics.net/lists/amd-gfx/msg58836.html
+ */
+ if (adev->reset_domain->type != XGMI_HIVE) {
+ hive->reset_domain =
+ amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
+ if (!hive->reset_domain) {
+ dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
+ ret = -ENOMEM;
+ kobject_put(&hive->kobj);
+ hive = NULL;
+ goto pro_end;
+ }
+ } else {
+ amdgpu_reset_get_reset_domain(adev->reset_domain);
+ hive->reset_domain = adev->reset_domain;
+ }
+ }
+
+ hive->hive_id = adev->gmc.xgmi.hive_id;
+ INIT_LIST_HEAD(&hive->device_list);
+ INIT_LIST_HEAD(&hive->node);
+ mutex_init(&hive->hive_lock);
+ atomic_set(&hive->number_devices, 0);
+ task_barrier_init(&hive->tb);
+ hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
+ hive->hi_req_gpu = NULL;
+
+ /*
+ * hive pstate on boot is high in vega20 so we have to go to low
+ * pstate on after boot.
+ */
+ hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
+ list_add_tail(&hive->node, &xgmi_hive_list);
+
+pro_end:
+ if (hive)
+ kobject_get(&hive->kobj);
+ mutex_unlock(&xgmi_mutex);
+ return hive;
+}
+
+void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)
+{
+ if (hive)
+ kobject_put(&hive->kobj);
+}
+
+int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
+{
+ int ret = 0;
+ struct amdgpu_hive_info *hive;
+ struct amdgpu_device *request_adev;
+ bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
+ bool init_low;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ return 0;
+
+ request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev;
+ init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
+ amdgpu_put_xgmi_hive(hive);
+ /* fw bug so temporarily disable pstate switching */
+ return 0;
+
+ if (!hive || adev->asic_type != CHIP_VEGA20)
+ return 0;
+
+ mutex_lock(&hive->hive_lock);
+
+ if (is_hi_req)
+ hive->hi_req_count++;
+ else
+ hive->hi_req_count--;
+
+ /*
+ * Vega20 only needs single peer to request pstate high for the hive to
+ * go high but all peers must request pstate low for the hive to go low
+ */
+ if (hive->pstate == pstate ||
+ (!is_hi_req && hive->hi_req_count && !init_low))
+ goto out;
+
+ dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate);
+
+ ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
+ if (ret) {
+ dev_err(request_adev->dev,
+ "XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
+ request_adev->gmc.xgmi.node_id,
+ request_adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ if (init_low)
+ hive->pstate = hive->hi_req_count ?
+ hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
+ else {
+ hive->pstate = pstate;
+ hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
+ adev : NULL;
+ }
+out:
+ mutex_unlock(&hive->hive_lock);
+ return ret;
+}
+
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /* Each psp need to set the latest topology */
+ ret = psp_xgmi_set_topology_info(&adev->psp,
+ atomic_read(&hive->number_devices),
+ &adev->psp.xgmi_context.top_info);
+ if (ret)
+ dev_err(adev->dev,
+ "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
+ adev->gmc.xgmi.node_id,
+ adev->gmc.xgmi.hive_id, ret);
+
+ return ret;
+}
+
+
+/*
+ * NOTE psp_xgmi_node_info.num_hops layout is as follows:
+ * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved)
+ * num_hops[5:3] = reserved
+ * num_hops[2:0] = number of hops
+ */
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ uint8_t num_hops_mask = 0x7;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return top->nodes[i].num_hops & num_hops_mask;
+ return -EINVAL;
+}
+
+int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return top->nodes[i].num_links;
+ return -EINVAL;
+}
+
+/*
+ * Devices that support extended data require the entire hive to initialize with
+ * the shared memory buffer flag set.
+ *
+ * Hive locks and conditions apply - see amdgpu_xgmi_add_device
+ */
+static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive,
+ bool set_extended_data)
+{
+ struct amdgpu_device *tmp_adev;
+ int ret;
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Failed to initialize xgmi session for data partition %i\n",
+ set_extended_data);
+ return ret;
+ }
+
+ }
+
+ return 0;
+}
+
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
+{
+ struct psp_xgmi_topology_info *top_info;
+ struct amdgpu_hive_info *hive;
+ struct amdgpu_xgmi *entry;
+ struct amdgpu_device *tmp_adev = NULL;
+
+ int count = 0, ret = 0;
+
+ if (!adev->gmc.xgmi.supported)
+ return 0;
+
+ if (!adev->gmc.xgmi.pending_reset &&
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ ret = psp_xgmi_initialize(&adev->psp, false, true);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Failed to initialize xgmi session\n");
+ return ret;
+ }
+
+ ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Failed to get hive id\n");
+ return ret;
+ }
+
+ ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Failed to get node id\n");
+ return ret;
+ }
+ } else {
+ adev->gmc.xgmi.hive_id = 16;
+ adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
+ }
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive) {
+ ret = -EINVAL;
+ dev_err(adev->dev,
+ "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
+ adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
+ goto exit;
+ }
+ mutex_lock(&hive->hive_lock);
+
+ top_info = &adev->psp.xgmi_context.top_info;
+
+ list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
+ list_for_each_entry(entry, &hive->device_list, head)
+ top_info->nodes[count++].node_id = entry->node_id;
+ top_info->num_nodes = count;
+ atomic_set(&hive->number_devices, count);
+
+ task_barrier_add_task(&hive->tb);
+
+ if (!adev->gmc.xgmi.pending_reset &&
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ /* update node list for other device in the hive */
+ if (tmp_adev != adev) {
+ top_info = &tmp_adev->psp.xgmi_context.top_info;
+ top_info->nodes[count - 1].node_id =
+ adev->gmc.xgmi.node_id;
+ top_info->num_nodes = count;
+ }
+ ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+ if (ret)
+ goto exit_unlock;
+ }
+
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ goto exit_unlock;
+ }
+ }
+
+ /* get topology again for hives that support extended data */
+ if (adev->psp.xgmi_context.supports_extended_data) {
+
+ /* initialize the hive to get extended data. */
+ ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true);
+ if (ret)
+ goto exit_unlock;
+
+ /* get the extended data. */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, true);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ goto exit_unlock;
+ }
+ }
+
+ /* initialize the hive to get non-extended data for the next round. */
+ ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false);
+ if (ret)
+ goto exit_unlock;
+
+ }
+ }
+
+ if (!ret && !adev->gmc.xgmi.pending_reset)
+ ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
+
+exit_unlock:
+ mutex_unlock(&hive->hive_lock);
+exit:
+ if (!ret) {
+ adev->hive = hive;
+ dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
+ adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
+ } else {
+ amdgpu_put_xgmi_hive(hive);
+ dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",
+ adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
+ ret);
+ }
+
+ return ret;
+}
+
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = adev->hive;
+
+ if (!adev->gmc.xgmi.supported)
+ return -EINVAL;
+
+ if (!hive)
+ return -EINVAL;
+
+ mutex_lock(&hive->hive_lock);
+ task_barrier_rem_task(&hive->tb);
+ amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
+ if (hive->hi_req_gpu == adev)
+ hive->hi_req_gpu = NULL;
+ list_del(&adev->gmc.xgmi.head);
+ mutex_unlock(&hive->hive_lock);
+
+ amdgpu_put_xgmi_hive(hive);
+ adev->hive = NULL;
+
+ if (atomic_dec_return(&hive->number_devices) == 0) {
+ /* Remove the hive from global hive list */
+ mutex_lock(&xgmi_mutex);
+ list_del(&hive->node);
+ mutex_unlock(&xgmi_mutex);
+
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ return 0;
+}
+
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ if (!adev->gmc.xgmi.supported ||
+ adev->gmc.xgmi.num_physical_nodes == 0)
+ return 0;
+
+ adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+
+ return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
+uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
+ uint64_t addr)
+{
+ struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi;
+ return (addr + xgmi->physical_node_id * xgmi->node_segment_size);
+}
+
+static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg)
+{
+ WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF);
+ WREG32_PCIE(pcs_status_reg, 0);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++)
+ pcs_clear_status(adev,
+ xgmi_pcs_err_status_reg_arct[i]);
+ break;
+ case CHIP_VEGA20:
+ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++)
+ pcs_clear_status(adev,
+ xgmi_pcs_err_status_reg_vg20[i]);
+ break;
+ case CHIP_ALDEBARAN:
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
+ pcs_clear_status(adev,
+ xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+ for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++)
+ pcs_clear_status(adev,
+ walf_pcs_err_status_reg_aldebaran[i]);
+ break;
+ default:
+ break;
+ }
+}
+
+static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
+ uint32_t value,
+ uint32_t mask_value,
+ uint32_t *ue_count,
+ uint32_t *ce_count,
+ bool is_xgmi_pcs,
+ bool check_mask)
+{
+ int i;
+ int ue_cnt = 0;
+ const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL;
+ uint32_t field_array_size = 0;
+
+ if (is_xgmi_pcs) {
+ if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+ pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
+ } else {
+ pcs_ras_fields = &xgmi_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
+ }
+ } else {
+ pcs_ras_fields = &wafl_pcs_ras_fields[0];
+ field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields);
+ }
+
+ if (check_mask)
+ value = value & ~mask_value;
+
+ /* query xgmi/walf pcs error status,
+ * only ue is supported */
+ for (i = 0; value && i < field_array_size; i++) {
+ ue_cnt = (value &
+ pcs_ras_fields[i].pcs_err_mask) >>
+ pcs_ras_fields[i].pcs_err_shift;
+ if (ue_cnt) {
+ dev_info(adev->dev, "%s detected\n",
+ pcs_ras_fields[i].err_name);
+ *ue_count += ue_cnt;
+ }
+
+ /* reset bit value if the bit is checked */
+ value &= ~(pcs_ras_fields[i].pcs_err_mask);
+ }
+
+ return 0;
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ int i;
+ uint32_t data, mask_data = 0;
+ uint32_t ue_cnt = 0, ce_cnt = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
+ return ;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ /* check xgmi pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) {
+ data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
+ }
+ /* check wafl pcs error */
+ for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct); i++) {
+ data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
+ }
+ break;
+ case CHIP_VEGA20:
+ /* check xgmi pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) {
+ data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, false);
+ }
+ /* check wafl pcs error */
+ for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20); i++) {
+ data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, false);
+ }
+ break;
+ case CHIP_ALDEBARAN:
+ /* check xgmi3x16 pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) {
+ data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
+ }
+ /* check wafl pcs error */
+ for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) {
+ data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]);
+ mask_data =
+ RREG32_PCIE(walf_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, false, true);
+ }
+ break;
+ default:
+ dev_warn(adev->dev, "XGMI RAS error query not supported");
+ break;
+ }
+
+ adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+
+ err_data->ue_count += ue_cnt;
+ err_data->ce_count += ce_cnt;
+}
+
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
+{
+ int ret = 0;
+ struct ta_ras_trigger_error_input *block_info =
+ (struct ta_ras_trigger_error_input *)inject_if;
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+ dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+ ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret;
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+ dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ dev_warn(adev->dev, "Failed to allow df cstate");
+
+ return ret;
+}
+
+struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
+ .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
+ .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+ .ras_error_inject = amdgpu_ras_error_inject_xgmi,
+};
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+ .ras_block = {
+ .hw_ops = &xgmi_ras_hw_ops,
+ .ras_late_init = amdgpu_xgmi_ras_late_init,
+ },
+};
+
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_xgmi_ras *ras;
+
+ if (!adev->gmc.xgmi.ras)
+ return 0;
+
+ ras = adev->gmc.xgmi.ras;
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras block!\n");
+ return err;
+ }
+
+ strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
new file mode 100644
index 000000000..86fbf5693
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_XGMI_H__
+#define __AMDGPU_XGMI_H__
+
+#include <drm/task_barrier.h>
+#include "amdgpu_psp.h"
+#include "amdgpu_ras.h"
+
+struct amdgpu_hive_info {
+ struct kobject kobj;
+ uint64_t hive_id;
+ struct list_head device_list;
+ struct list_head node;
+ atomic_t number_devices;
+ struct mutex hive_lock;
+ int hi_req_count;
+ struct amdgpu_device *hi_req_gpu;
+ struct task_barrier tb;
+ enum {
+ AMDGPU_XGMI_PSTATE_MIN,
+ AMDGPU_XGMI_PSTATE_MAX_VEGA20,
+ AMDGPU_XGMI_PSTATE_UNKNOWN
+ } pstate;
+
+ struct amdgpu_reset_domain *reset_domain;
+ uint32_t device_remove_count;
+};
+
+struct amdgpu_pcs_ras_field {
+ const char *err_name;
+ uint32_t pcs_err_mask;
+ uint32_t pcs_err_shift;
+};
+
+extern struct amdgpu_xgmi_ras xgmi_ras;
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
+void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
+int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
+uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
+ uint64_t addr);
+static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev)
+{
+ return (amdgpu_use_xgmi_p2p &&
+ adev != bo_adev &&
+ adev->gmc.xgmi.hive_id &&
+ adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
new file mode 100644
index 000000000..104a5ad83
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef AMDGV_SRIOV_MSG__H_
+#define AMDGV_SRIOV_MSG__H_
+
+/* unit in kilobytes */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
+
+/*
+ * layout
+ * 0 64KB 65KB 66KB
+ * | VBIOS | PF2VF | VF2PF | Bad Page | ...
+ * | 64KB | 1KB | 1KB |
+ */
+#define AMD_SRIOV_MSG_SIZE_KB 1
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+
+/*
+ * PF2VF history log:
+ * v1 defined in amdgim
+ * v2 current
+ *
+ * VF2PF history log:
+ * v1 defined in amdgim
+ * v2 defined in amdgim
+ * v3 current
+ */
+#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
+#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+
+#define AMD_SRIOV_MSG_RESERVE_UCODE 24
+
+#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4
+
+enum amd_sriov_ucode_engine_id {
+ AMD_SRIOV_UCODE_ID_VCE = 0,
+ AMD_SRIOV_UCODE_ID_UVD,
+ AMD_SRIOV_UCODE_ID_MC,
+ AMD_SRIOV_UCODE_ID_ME,
+ AMD_SRIOV_UCODE_ID_PFP,
+ AMD_SRIOV_UCODE_ID_CE,
+ AMD_SRIOV_UCODE_ID_RLC,
+ AMD_SRIOV_UCODE_ID_RLC_SRLC,
+ AMD_SRIOV_UCODE_ID_RLC_SRLG,
+ AMD_SRIOV_UCODE_ID_RLC_SRLS,
+ AMD_SRIOV_UCODE_ID_MEC,
+ AMD_SRIOV_UCODE_ID_MEC2,
+ AMD_SRIOV_UCODE_ID_SOS,
+ AMD_SRIOV_UCODE_ID_ASD,
+ AMD_SRIOV_UCODE_ID_TA_RAS,
+ AMD_SRIOV_UCODE_ID_TA_XGMI,
+ AMD_SRIOV_UCODE_ID_SMC,
+ AMD_SRIOV_UCODE_ID_SDMA,
+ AMD_SRIOV_UCODE_ID_SDMA2,
+ AMD_SRIOV_UCODE_ID_VCN,
+ AMD_SRIOV_UCODE_ID_DMCU,
+ AMD_SRIOV_UCODE_ID__MAX
+};
+
+#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
+
+union amd_sriov_msg_feature_flags {
+ struct {
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t av1_support : 1;
+ uint32_t reserved : 25;
+ } flags;
+ uint32_t all;
+};
+
+union amd_sriov_reg_access_flags {
+ struct {
+ uint32_t vf_reg_access_ih : 1;
+ uint32_t vf_reg_access_mmhub : 1;
+ uint32_t vf_reg_access_gc : 1;
+ uint32_t reserved : 29;
+ } flags;
+ uint32_t all;
+};
+
+union amd_sriov_msg_os_info {
+ struct {
+ uint32_t windows : 1;
+ uint32_t reserved : 31;
+ } info;
+ uint32_t all;
+};
+
+struct amd_sriov_msg_uuid_info {
+ union {
+ struct {
+ uint32_t did : 16;
+ uint32_t fcn : 8;
+ uint32_t asic_7 : 8;
+ };
+ uint32_t time_low;
+ };
+
+ struct {
+ uint32_t time_mid : 16;
+ uint32_t time_high : 12;
+ uint32_t version : 4;
+ };
+
+ struct {
+ struct {
+ uint8_t clk_seq_hi : 6;
+ uint8_t variant : 2;
+ };
+ union {
+ uint8_t clk_seq_low;
+ uint8_t asic_6;
+ };
+ uint16_t asic_4;
+ };
+
+ uint32_t asic_0;
+};
+
+struct amd_sriov_msg_pf2vf_info_header {
+ /* the total structure size in byte */
+ uint32_t size;
+ /* version of this structure, written by the HOST */
+ uint32_t version;
+ /* reserved */
+ uint32_t reserved[2];
+};
+
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48)
+struct amd_sriov_msg_pf2vf_info {
+ /* header contains size and version */
+ struct amd_sriov_msg_pf2vf_info_header header;
+ /* use private key from mailbox 2 to create checksum */
+ uint32_t checksum;
+ /* The features flags of the HOST driver supports */
+ union amd_sriov_msg_feature_flags feature_flags;
+ /* (max_width * max_height * fps) / (16 * 16) */
+ uint32_t hevc_enc_max_mb_per_second;
+ /* (max_width * max_height) / (16 * 16) */
+ uint32_t hevc_enc_max_mb_per_frame;
+ /* (max_width * max_height * fps) / (16 * 16) */
+ uint32_t avc_enc_max_mb_per_second;
+ /* (max_width * max_height) / (16 * 16) */
+ uint32_t avc_enc_max_mb_per_frame;
+ /* MEC FW position in BYTE from the start of VF visible frame buffer */
+ uint64_t mecfw_offset;
+ /* MEC FW size in BYTE */
+ uint32_t mecfw_size;
+ /* UVD FW position in BYTE from the start of VF visible frame buffer */
+ uint64_t uvdfw_offset;
+ /* UVD FW size in BYTE */
+ uint32_t uvdfw_size;
+ /* VCE FW position in BYTE from the start of VF visible frame buffer */
+ uint64_t vcefw_offset;
+ /* VCE FW size in BYTE */
+ uint32_t vcefw_size;
+ /* Bad pages block position in BYTE */
+ uint32_t bp_block_offset_low;
+ uint32_t bp_block_offset_high;
+ /* Bad pages block size in BYTE */
+ uint32_t bp_block_size;
+ /* frequency for VF to update the VF2PF area in msec, 0 = manual */
+ uint32_t vf2pf_update_interval_ms;
+ /* identification in ROCm SMI */
+ uint64_t uuid;
+ uint32_t fcn_idx;
+ /* flags to indicate which register access method VF should use */
+ union amd_sriov_reg_access_flags reg_access_flags;
+ /* MM BW management */
+ struct {
+ uint32_t decode_max_dimension_pixels;
+ uint32_t decode_max_frame_pixels;
+ uint32_t encode_max_dimension_pixels;
+ uint32_t encode_max_frame_pixels;
+ } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
+ /* UUID info */
+ struct amd_sriov_msg_uuid_info uuid_info;
+ /* PCIE atomic ops support flag */
+ uint32_t pcie_atomic_ops_support_flags;
+ /* reserved */
+ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
+};
+
+struct amd_sriov_msg_vf2pf_info_header {
+ /* the total structure size in byte */
+ uint32_t size;
+ /* version of this structure, written by the guest */
+ uint32_t version;
+ /* reserved */
+ uint32_t reserved[2];
+};
+
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70)
+struct amd_sriov_msg_vf2pf_info {
+ /* header contains size and version */
+ struct amd_sriov_msg_vf2pf_info_header header;
+ uint32_t checksum;
+ /* driver version */
+ uint8_t driver_version[64];
+ /* driver certification, 1=WHQL, 0=None */
+ uint32_t driver_cert;
+ /* guest OS type and version */
+ union amd_sriov_msg_os_info os_info;
+ /* guest fb information in the unit of MB */
+ uint32_t fb_usage;
+ /* guest gfx engine usage percentage */
+ uint32_t gfx_usage;
+ /* guest gfx engine health percentage */
+ uint32_t gfx_health;
+ /* guest compute engine usage percentage */
+ uint32_t compute_usage;
+ /* guest compute engine health percentage */
+ uint32_t compute_health;
+ /* guest avc engine usage percentage. 0xffff means N/A */
+ uint32_t avc_enc_usage;
+ /* guest avc engine health percentage. 0xffff means N/A */
+ uint32_t avc_enc_health;
+ /* guest hevc engine usage percentage. 0xffff means N/A */
+ uint32_t hevc_enc_usage;
+ /* guest hevc engine usage percentage. 0xffff means N/A */
+ uint32_t hevc_enc_health;
+ /* combined encode/decode usage */
+ uint32_t encode_usage;
+ uint32_t decode_usage;
+ /* Version of PF2VF that VF understands */
+ uint32_t pf2vf_version_required;
+ /* additional FB usage */
+ uint32_t fb_vis_usage;
+ uint32_t fb_vis_size;
+ uint32_t fb_size;
+ /* guest ucode data, each one is 1.25 Dword */
+ struct {
+ uint8_t id;
+ uint32_t version;
+ } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
+ uint64_t dummy_page_addr;
+
+ /* reserved */
+ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
+};
+
+/* mailbox message send from guest to host */
+enum amd_sriov_mailbox_request_message {
+ MB_REQ_MSG_REQ_GPU_INIT_ACCESS = 1,
+ MB_REQ_MSG_REL_GPU_INIT_ACCESS,
+ MB_REQ_MSG_REQ_GPU_FINI_ACCESS,
+ MB_REQ_MSG_REL_GPU_FINI_ACCESS,
+ MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
+ MB_REQ_MSG_REQ_GPU_INIT_DATA,
+
+ MB_REQ_MSG_LOG_VF_ERROR = 200,
+};
+
+/* mailbox message send from host to guest */
+enum amd_sriov_mailbox_response_message {
+ MB_RES_MSG_CLR_MSG_BUF = 0,
+ MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
+ MB_RES_MSG_FLR_NOTIFICATION,
+ MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
+ MB_RES_MSG_SUCCESS,
+ MB_RES_MSG_FAIL,
+ MB_RES_MSG_QUERY_ALIVE,
+ MB_RES_MSG_GPU_INIT_DATA_READY,
+
+ MB_RES_MSG_TEXT_MESSAGE = 255
+};
+
+/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
+enum amd_sriov_gpu_init_data_version {
+ GPU_INIT_DATA_READY_V1 = 1,
+};
+
+#pragma pack(pop) // Restore previous packing option
+
+/* checksum function between host and guest */
+unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key,
+ unsigned int checksum);
+
+/* assertion at compile time */
+#ifdef __linux__
+#define stringification(s) _stringification(s)
+#define _stringification(s) #s
+
+_Static_assert(
+ sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
+ "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+
+_Static_assert(
+ sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
+ "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
+
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+
+#undef _stringification
+#undef stringification
+#endif
+
+#endif /* AMDGV_SRIOV_MSG__H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
new file mode 100644
index 000000000..d0fc62784
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "amdgpu_xcp.h"
+#include "gfx_v9_4_3.h"
+#include "gfxhub_v1_2.h"
+#include "sdma_v4_4_2.h"
+
+#define XCP_INST_MASK(num_inst, xcp_id) \
+ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
+#define AMDGPU_XCP_OPS_KFD (1 << 0)
+
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
+
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
+
+ adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
+ adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
+
+ adev->doorbell_index.sdma_doorbell_range = 20;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->doorbell_index.sdma_engine[i] =
+ AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
+ i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+ adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
+}
+
+static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
+ uint32_t inst_idx, struct amdgpu_ring *ring)
+{
+ int xcp_id;
+ enum AMDGPU_XCP_IP_BLOCK ip_blk;
+ uint32_t inst_mask;
+
+ ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return;
+
+ inst_mask = 1 << inst_idx;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ ip_blk = AMDGPU_XCP_GFX;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ip_blk = AMDGPU_XCP_SDMA;
+ break;
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ ip_blk = AMDGPU_XCP_VCN;
+ if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ inst_mask = 1 << (inst_idx * 2);
+ break;
+ default:
+ DRM_ERROR("Not support ring type %d!", ring->funcs->type);
+ return;
+ }
+
+ for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+ if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+ ring->xcp_id = xcp_id;
+ break;
+ }
+ }
+}
+
+static void aqua_vanjaram_xcp_gpu_sched_update(
+ struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int sel_xcp_id)
+{
+ unsigned int *num_gpu_sched;
+
+ num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+ .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+ adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+ .sched[(*num_gpu_sched)++] = &ring->sched;
+ DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name,
+ sel_xcp_id, ring->funcs->type,
+ ring->hw_prio, *num_gpu_sched);
+}
+
+static int aqua_vanjaram_xcp_sched_list_update(
+ struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+ memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+ }
+
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ ring = adev->rings[i];
+ if (!ring || !ring->sched.ready || ring->no_scheduler)
+ continue;
+
+ aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+ /* VCN is shared by two partitions under CPX MODE */
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring);
+ else
+ aqua_vanjaram_set_xcp_id(adev, ring->me, ring);
+ }
+
+ return aqua_vanjaram_xcp_sched_list_update(adev);
+}
+
+static int aqua_vanjaram_select_scheds(
+ struct amdgpu_device *adev,
+ u32 hw_ip,
+ u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds)
+{
+ u32 sel_xcp_id;
+ int i;
+
+ if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
+ u32 least_ref_cnt = ~0;
+
+ fpriv->xcp_id = 0;
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ u32 total_ref_cnt;
+
+ total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt);
+ if (total_ref_cnt < least_ref_cnt) {
+ fpriv->xcp_id = i;
+ least_ref_cnt = total_ref_cnt;
+ }
+ }
+ }
+ sel_xcp_id = fpriv->xcp_id;
+
+ if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+ *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+ *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+ atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+ DRM_DEBUG("Selected partition #%d", sel_xcp_id);
+ } else {
+ DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst)
+{
+ int8_t dev_inst;
+
+ switch (block) {
+ case GC_HWIP:
+ case SDMA0_HWIP:
+ /* Both JPEG and VCN as JPEG is only alias of VCN */
+ case VCN_HWIP:
+ dev_inst = adev->ip_map.dev_inst[block][inst];
+ break;
+ default:
+ /* For rest of the IPs, no look up required.
+ * Assume 'logical instance == physical instance' for all configs. */
+ dev_inst = inst;
+ break;
+ }
+
+ return dev_inst;
+}
+
+static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask)
+{
+ uint32_t dev_mask = 0;
+ int8_t log_inst, dev_inst;
+
+ while (mask) {
+ log_inst = ffs(mask) - 1;
+ dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst);
+ dev_mask |= (1 << dev_inst);
+ mask &= ~(1 << log_inst);
+ }
+
+ return dev_mask;
+}
+
+static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+ int l = 0, i;
+
+ while (inst_mask) {
+ i = ffs(inst_mask) - 1;
+ adev->ip_map.dev_inst[ip_block][l++] = i;
+ inst_mask &= ~(1 << i);
+ }
+ for (; l < HWIP_MAX_INSTANCE; l++)
+ adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
+{
+ u32 ip_map[][2] = {
+ { GC_HWIP, adev->gfx.xcc_mask },
+ { SDMA0_HWIP, adev->sdma.sdma_mask },
+ { VCN_HWIP, adev->vcn.inst_mask },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+ aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+ adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
+ adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask;
+}
+
+/* Fixed pattern for smn addressing on different AIDs:
+ * bit[34]: indicate cross AID access
+ * bit[33:32]: indicate target AID id
+ * AID id range is 0 ~ 3 as maximum AID number is 4.
+ */
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
+{
+ u64 ext_offset;
+
+ /* local routing and bit[34:32] will be zeros */
+ if (ext_id == 0)
+ return 0;
+
+ /* Initiated from host, accessing to all non-zero aids are cross traffic */
+ ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
+
+ return ext_offset;
+}
+
+static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ if (adev->nbio.funcs->get_compute_partition_mode)
+ mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+
+ return mode;
+}
+
+static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int num_xcc, num_xcc_per_xcp = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcc_per_xcp = 1;
+ break;
+ }
+
+ return num_xcc_per_xcp;
+}
+
+static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+ int num_sdma, num_vcn;
+
+ num_sdma = adev->sdma.num_instances;
+ num_vcn = adev->vcn.num_vcn_inst;
+
+ switch (xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma;
+ num_vcn_xcp = num_vcn;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 2;
+ num_vcn_xcp = num_vcn / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 3;
+ num_vcn_xcp = num_vcn / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_sdma_xcp = num_sdma / 4;
+ num_vcn_xcp = num_vcn / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_sdma_xcp = 2;
+ num_vcn_xcp = num_vcn ? 1 : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+
+ switch (ip_id) {
+ case AMDGPU_XCP_GFXHUB:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_GFX:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
+ break;
+ case AMDGPU_XCP_SDMA:
+ ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+ ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
+ break;
+ case AMDGPU_XCP_VCN:
+ ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
+ /* TODO : Assign IP funcs */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ip->ip_id = ip_id;
+
+ return 0;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ if (adev->gmc.num_mem_partitions == 1)
+ return AMDGPU_SPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc / 2)
+ return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+ AMDGPU_QPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+ return AMDGPU_DPX_PARTITION_MODE;
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum amdgpu_gfx_partition mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xccs_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
+ case AMDGPU_DPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
+ case AMDGPU_TPX_PARTITION_MODE:
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 3) &&
+ ((num_xcc % 3) == 0);
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xccs_per_xcp = num_xcc / 4;
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 4) &&
+ (num_xccs_per_xcp >= 2);
+ case AMDGPU_CPX_PARTITION_MODE:
+ return ((num_xcc > 1) &&
+ (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
+ (num_xcc % adev->gmc.num_mem_partitions) == 0);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ /* TODO:
+ * Stop user queues and threads, and make sure GPU is empty of work.
+ */
+
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+ return 0;
+}
+
+static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+ amdgpu_amdkfd_device_init(xcp_mgr->adev);
+ /* If KFD init failed, return failure */
+ if (!xcp_mgr->adev->kfd.init_complete)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode, int *num_xcps)
+{
+ int num_xcc_per_xcp, num_xcc, ret;
+ struct amdgpu_device *adev;
+ u32 flags = 0;
+
+ adev = xcp_mgr->adev;
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+ mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+ } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+ dev_err(adev->dev,
+ "Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+ amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+
+ if (adev->kfd.init_complete)
+ flags |= AMDGPU_XCP_OPS_KFD;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+ }
+
+ ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags);
+ if (ret)
+ goto unlock;
+
+ num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
+ if (adev->gfx.funcs->switch_partition_mode)
+ adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
+ num_xcc_per_xcp);
+
+ /* Init info about new xcps */
+ *num_xcps = num_xcc / num_xcc_per_xcp;
+ amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+ ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
+unlock:
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
+ return ret;
+}
+
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+ /* memory/spatial modes validation check is already done */
+ *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+ *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+ struct amdgpu_numa_info numa_info;
+ struct amdgpu_device *adev;
+ uint32_t xcc_mask;
+ int r, i, xcc_id;
+
+ adev = xcp_mgr->adev;
+ /* TODO: BIOS is not returning the right info now
+ * Check on this later
+ */
+ /*
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ */
+ if (adev->gmc.num_mem_partitions == 1) {
+ /* Only one range */
+ *mem_id = 0;
+ return 0;
+ }
+
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+ if (r || !xcc_mask)
+ return -EINVAL;
+
+ xcc_id = ffs(xcc_mask) - 1;
+ if (!adev->gmc.is_app_apu)
+ return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+ r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+ if (r)
+ return r;
+
+ r = -EINVAL;
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+ *mem_id = i;
+ r = 0;
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ if (!ip)
+ return -EINVAL;
+
+ return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
+ .switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
+ .query_partition_mode = &aqua_vanjaram_query_partition_mode,
+ .get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
+ .select_scheds = &aqua_vanjaram_select_scheds,
+ .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
+};
+
+static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
+ &aqua_vanjaram_xcp_funcs);
+ if (ret)
+ return ret;
+
+ /* TODO: Default memory node affinity init */
+
+ return ret;
+}
+
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
+{
+ u32 mask, inst_mask = adev->sdma.sdma_mask;
+ int ret, i;
+
+ /* generally 1 AID supports 4 instances */
+ adev->sdma.num_inst_per_aid = 4;
+ adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
+
+ adev->aid_mask = i = 1;
+ inst_mask >>= adev->sdma.num_inst_per_aid;
+
+ for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
+ inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
+ if ((inst_mask & mask) == mask)
+ adev->aid_mask |= (1 << i);
+ }
+
+ /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+ * addressed based on logical instance ids.
+ */
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_inst_per_aid = 1;
+ adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
+ adev->jpeg.harvest_config = 0;
+ adev->jpeg.num_inst_per_aid = 1;
+ adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
+
+ ret = aqua_vanjaram_xcp_mgr_init(adev);
+ if (ret)
+ return ret;
+
+ aqua_vanjaram_ip_map_init(adev);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
new file mode 100644
index 000000000..fda99c958
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "arct_ip_offset.h"
+
+int arct_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the block needed by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+ adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+ adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i]));
+ adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i]));
+ adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
+ }
+ return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
new file mode 100644
index 000000000..a13c443ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "athub_v1_0.h"
+
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if(def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(1, 5, 0):
+ athub_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
new file mode 100644
index 000000000..6be0a6704
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V1_0_H__
+#define __ATHUB_V1_0_H__
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
new file mode 100644
index 000000000..a9521c98e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_0.h"
+
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_default.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable)
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
+ return;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable)
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(1, 3, 1):
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ athub_v2_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v2_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
new file mode 100644
index 000000000..8b763f6df
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_0_H__
+#define __ATHUB_V2_0_H__
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
new file mode 100644
index 000000000..78508ae6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_1.h"
+
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if(def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ case IP_VERSION(2, 4, 0):
+ athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE);
+ athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
new file mode 100644
index 000000000..b799f14bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_1_H__
+#define __ATHUB_V2_1_H__
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
new file mode 100644
index 000000000..f0e235f98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v3_0.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
+static void
+athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v3_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v3_0_set_cg_cntl(adev, data);
+}
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ case IP_VERSION(3, 0, 2):
+ athub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v3_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
new file mode 100644
index 000000000..e08a7d564
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V3_0_H__
+#define __ATHUB_V3_0_H__
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
new file mode 100644
index 000000000..9f63ddb89
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -0,0 +1,1591 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
+
+#include <asm/unaligned.h>
+
+#include <drm/drm_util.h>
+
+#define ATOM_DEBUG
+
+#include "atomfirmware.h"
+#include "atom.h"
+#include "atom-names.h"
+#include "atom-bits.h"
+#include "amdgpu.h"
+
+#define ATOM_COND_ABOVE 0
+#define ATOM_COND_ABOVEOREQUAL 1
+#define ATOM_COND_ALWAYS 2
+#define ATOM_COND_BELOW 3
+#define ATOM_COND_BELOWOREQUAL 4
+#define ATOM_COND_EQUAL 5
+#define ATOM_COND_NOTEQUAL 6
+
+#define ATOM_PORT_ATI 0
+#define ATOM_PORT_PCI 1
+#define ATOM_PORT_SYSIO 2
+
+#define ATOM_UNIT_MICROSEC 0
+#define ATOM_UNIT_MILLISEC 1
+
+#define PLL_INDEX 2
+#define PLL_DATA 3
+
+#define ATOM_CMD_TIMEOUT_SEC 20
+
+typedef struct {
+ struct atom_context *ctx;
+ uint32_t *ps, *ws;
+ int ps_shift;
+ uint16_t start;
+ unsigned last_jump;
+ unsigned long last_jump_jiffies;
+ bool abort;
+} atom_exec_context;
+
+int amdgpu_atom_debug;
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+
+static uint32_t atom_arg_mask[8] =
+ { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
+ 0xFF000000 };
+static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };
+
+static int atom_dst_to_src[8][4] = {
+ /* translate destination alignment field to the source alignment encoding */
+ {0, 0, 0, 0},
+ {1, 2, 3, 0},
+ {1, 2, 3, 0},
+ {1, 2, 3, 0},
+ {4, 5, 6, 7},
+ {4, 5, 6, 7},
+ {4, 5, 6, 7},
+ {4, 5, 6, 7},
+};
+static int atom_def_dst[8] = { 0, 0, 1, 2, 0, 1, 2, 3 };
+
+static int debug_depth;
+#ifdef ATOM_DEBUG
+static void debug_print_spaces(int n)
+{
+ while (n--)
+ printk(" ");
+}
+
+#define DEBUG(...) do if (amdgpu_atom_debug) { printk(KERN_DEBUG __VA_ARGS__); } while (0)
+#define SDEBUG(...) do if (amdgpu_atom_debug) { printk(KERN_DEBUG); debug_print_spaces(debug_depth); printk(__VA_ARGS__); } while (0)
+#else
+#define DEBUG(...) do { } while (0)
+#define SDEBUG(...) do { } while (0)
+#endif
+
+static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
+ uint32_t index, uint32_t data)
+{
+ uint32_t temp = 0xCDCDCDCD;
+
+ while (1)
+ switch (CU8(base)) {
+ case ATOM_IIO_NOP:
+ base++;
+ break;
+ case ATOM_IIO_READ:
+ temp = ctx->card->reg_read(ctx->card, CU16(base + 1));
+ base += 3;
+ break;
+ case ATOM_IIO_WRITE:
+ ctx->card->reg_write(ctx->card, CU16(base + 1), temp);
+ base += 3;
+ break;
+ case ATOM_IIO_CLEAR:
+ temp &=
+ ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+ CU8(base + 2));
+ base += 3;
+ break;
+ case ATOM_IIO_SET:
+ temp |=
+ (0xFFFFFFFF >> (32 - CU8(base + 1))) << CU8(base +
+ 2);
+ base += 3;
+ break;
+ case ATOM_IIO_MOVE_INDEX:
+ temp &=
+ ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+ CU8(base + 3));
+ temp |=
+ ((index >> CU8(base + 2)) &
+ (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+ 3);
+ base += 4;
+ break;
+ case ATOM_IIO_MOVE_DATA:
+ temp &=
+ ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+ CU8(base + 3));
+ temp |=
+ ((data >> CU8(base + 2)) &
+ (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+ 3);
+ base += 4;
+ break;
+ case ATOM_IIO_MOVE_ATTR:
+ temp &=
+ ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+ CU8(base + 3));
+ temp |=
+ ((ctx->
+ io_attr >> CU8(base + 2)) & (0xFFFFFFFF >> (32 -
+ CU8
+ (base
+ +
+ 1))))
+ << CU8(base + 3);
+ base += 4;
+ break;
+ case ATOM_IIO_END:
+ return temp;
+ default:
+ pr_info("Unknown IIO opcode\n");
+ return 0;
+ }
+}
+
+static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
+ int *ptr, uint32_t *saved, int print)
+{
+ uint32_t idx, val = 0xCDCDCDCD, align, arg;
+ struct atom_context *gctx = ctx->ctx;
+ arg = attr & 7;
+ align = (attr >> 3) & 7;
+ switch (arg) {
+ case ATOM_ARG_REG:
+ idx = U16(*ptr);
+ (*ptr) += 2;
+ if (print)
+ DEBUG("REG[0x%04X]", idx);
+ idx += gctx->reg_block;
+ switch (gctx->io_mode) {
+ case ATOM_IO_MM:
+ val = gctx->card->reg_read(gctx->card, idx);
+ break;
+ case ATOM_IO_PCI:
+ pr_info("PCI registers are not implemented\n");
+ return 0;
+ case ATOM_IO_SYSIO:
+ pr_info("SYSIO registers are not implemented\n");
+ return 0;
+ default:
+ if (!(gctx->io_mode & 0x80)) {
+ pr_info("Bad IO mode\n");
+ return 0;
+ }
+ if (!gctx->iio[gctx->io_mode & 0x7F]) {
+ pr_info("Undefined indirect IO read method %d\n",
+ gctx->io_mode & 0x7F);
+ return 0;
+ }
+ val =
+ atom_iio_execute(gctx,
+ gctx->iio[gctx->io_mode & 0x7F],
+ idx, 0);
+ }
+ break;
+ case ATOM_ARG_PS:
+ idx = U8(*ptr);
+ (*ptr)++;
+ /* get_unaligned_le32 avoids unaligned accesses from atombios
+ * tables, noticed on a DEC Alpha. */
+ val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ if (print)
+ DEBUG("PS[0x%02X,0x%04X]", idx, val);
+ break;
+ case ATOM_ARG_WS:
+ idx = U8(*ptr);
+ (*ptr)++;
+ if (print)
+ DEBUG("WS[0x%02X]", idx);
+ switch (idx) {
+ case ATOM_WS_QUOTIENT:
+ val = gctx->divmul[0];
+ break;
+ case ATOM_WS_REMAINDER:
+ val = gctx->divmul[1];
+ break;
+ case ATOM_WS_DATAPTR:
+ val = gctx->data_block;
+ break;
+ case ATOM_WS_SHIFT:
+ val = gctx->shift;
+ break;
+ case ATOM_WS_OR_MASK:
+ val = 1 << gctx->shift;
+ break;
+ case ATOM_WS_AND_MASK:
+ val = ~(1 << gctx->shift);
+ break;
+ case ATOM_WS_FB_WINDOW:
+ val = gctx->fb_base;
+ break;
+ case ATOM_WS_ATTRIBUTES:
+ val = gctx->io_attr;
+ break;
+ case ATOM_WS_REGPTR:
+ val = gctx->reg_block;
+ break;
+ default:
+ val = ctx->ws[idx];
+ }
+ break;
+ case ATOM_ARG_ID:
+ idx = U16(*ptr);
+ (*ptr) += 2;
+ if (print) {
+ if (gctx->data_block)
+ DEBUG("ID[0x%04X+%04X]", idx, gctx->data_block);
+ else
+ DEBUG("ID[0x%04X]", idx);
+ }
+ val = U32(idx + gctx->data_block);
+ break;
+ case ATOM_ARG_FB:
+ idx = U8(*ptr);
+ (*ptr)++;
+ if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) {
+ DRM_ERROR("ATOM: fb read beyond scratch region: %d vs. %d\n",
+ gctx->fb_base + (idx * 4), gctx->scratch_size_bytes);
+ val = 0;
+ } else
+ val = gctx->scratch[(gctx->fb_base / 4) + idx];
+ if (print)
+ DEBUG("FB[0x%02X]", idx);
+ break;
+ case ATOM_ARG_IMM:
+ switch (align) {
+ case ATOM_SRC_DWORD:
+ val = U32(*ptr);
+ (*ptr) += 4;
+ if (print)
+ DEBUG("IMM 0x%08X\n", val);
+ return val;
+ case ATOM_SRC_WORD0:
+ case ATOM_SRC_WORD8:
+ case ATOM_SRC_WORD16:
+ val = U16(*ptr);
+ (*ptr) += 2;
+ if (print)
+ DEBUG("IMM 0x%04X\n", val);
+ return val;
+ case ATOM_SRC_BYTE0:
+ case ATOM_SRC_BYTE8:
+ case ATOM_SRC_BYTE16:
+ case ATOM_SRC_BYTE24:
+ val = U8(*ptr);
+ (*ptr)++;
+ if (print)
+ DEBUG("IMM 0x%02X\n", val);
+ return val;
+ }
+ return 0;
+ case ATOM_ARG_PLL:
+ idx = U8(*ptr);
+ (*ptr)++;
+ if (print)
+ DEBUG("PLL[0x%02X]", idx);
+ val = gctx->card->pll_read(gctx->card, idx);
+ break;
+ case ATOM_ARG_MC:
+ idx = U8(*ptr);
+ (*ptr)++;
+ if (print)
+ DEBUG("MC[0x%02X]", idx);
+ val = gctx->card->mc_read(gctx->card, idx);
+ break;
+ }
+ if (saved)
+ *saved = val;
+ val &= atom_arg_mask[align];
+ val >>= atom_arg_shift[align];
+ if (print)
+ switch (align) {
+ case ATOM_SRC_DWORD:
+ DEBUG(".[31:0] -> 0x%08X\n", val);
+ break;
+ case ATOM_SRC_WORD0:
+ DEBUG(".[15:0] -> 0x%04X\n", val);
+ break;
+ case ATOM_SRC_WORD8:
+ DEBUG(".[23:8] -> 0x%04X\n", val);
+ break;
+ case ATOM_SRC_WORD16:
+ DEBUG(".[31:16] -> 0x%04X\n", val);
+ break;
+ case ATOM_SRC_BYTE0:
+ DEBUG(".[7:0] -> 0x%02X\n", val);
+ break;
+ case ATOM_SRC_BYTE8:
+ DEBUG(".[15:8] -> 0x%02X\n", val);
+ break;
+ case ATOM_SRC_BYTE16:
+ DEBUG(".[23:16] -> 0x%02X\n", val);
+ break;
+ case ATOM_SRC_BYTE24:
+ DEBUG(".[31:24] -> 0x%02X\n", val);
+ break;
+ }
+ return val;
+}
+
+static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+ uint32_t align = (attr >> 3) & 7, arg = attr & 7;
+ switch (arg) {
+ case ATOM_ARG_REG:
+ case ATOM_ARG_ID:
+ (*ptr) += 2;
+ break;
+ case ATOM_ARG_PLL:
+ case ATOM_ARG_MC:
+ case ATOM_ARG_PS:
+ case ATOM_ARG_WS:
+ case ATOM_ARG_FB:
+ (*ptr)++;
+ break;
+ case ATOM_ARG_IMM:
+ switch (align) {
+ case ATOM_SRC_DWORD:
+ (*ptr) += 4;
+ return;
+ case ATOM_SRC_WORD0:
+ case ATOM_SRC_WORD8:
+ case ATOM_SRC_WORD16:
+ (*ptr) += 2;
+ return;
+ case ATOM_SRC_BYTE0:
+ case ATOM_SRC_BYTE8:
+ case ATOM_SRC_BYTE16:
+ case ATOM_SRC_BYTE24:
+ (*ptr)++;
+ return;
+ }
+ return;
+ }
+}
+
+static uint32_t atom_get_src(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+ return atom_get_src_int(ctx, attr, ptr, NULL, 1);
+}
+
+static uint32_t atom_get_src_direct(atom_exec_context *ctx, uint8_t align, int *ptr)
+{
+ uint32_t val = 0xCDCDCDCD;
+
+ switch (align) {
+ case ATOM_SRC_DWORD:
+ val = U32(*ptr);
+ (*ptr) += 4;
+ break;
+ case ATOM_SRC_WORD0:
+ case ATOM_SRC_WORD8:
+ case ATOM_SRC_WORD16:
+ val = U16(*ptr);
+ (*ptr) += 2;
+ break;
+ case ATOM_SRC_BYTE0:
+ case ATOM_SRC_BYTE8:
+ case ATOM_SRC_BYTE16:
+ case ATOM_SRC_BYTE24:
+ val = U8(*ptr);
+ (*ptr)++;
+ break;
+ }
+ return val;
+}
+
+static uint32_t atom_get_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+ int *ptr, uint32_t *saved, int print)
+{
+ return atom_get_src_int(ctx,
+ arg | atom_dst_to_src[(attr >> 3) &
+ 7][(attr >> 6) & 3] << 3,
+ ptr, saved, print);
+}
+
+static void atom_skip_dst(atom_exec_context *ctx, int arg, uint8_t attr, int *ptr)
+{
+ atom_skip_src_int(ctx,
+ arg | atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) &
+ 3] << 3, ptr);
+}
+
+static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+ int *ptr, uint32_t val, uint32_t saved)
+{
+ uint32_t align =
+ atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3], old_val =
+ val, idx;
+ struct atom_context *gctx = ctx->ctx;
+ old_val &= atom_arg_mask[align] >> atom_arg_shift[align];
+ val <<= atom_arg_shift[align];
+ val &= atom_arg_mask[align];
+ saved &= ~atom_arg_mask[align];
+ val |= saved;
+ switch (arg) {
+ case ATOM_ARG_REG:
+ idx = U16(*ptr);
+ (*ptr) += 2;
+ DEBUG("REG[0x%04X]", idx);
+ idx += gctx->reg_block;
+ switch (gctx->io_mode) {
+ case ATOM_IO_MM:
+ if (idx == 0)
+ gctx->card->reg_write(gctx->card, idx,
+ val << 2);
+ else
+ gctx->card->reg_write(gctx->card, idx, val);
+ break;
+ case ATOM_IO_PCI:
+ pr_info("PCI registers are not implemented\n");
+ return;
+ case ATOM_IO_SYSIO:
+ pr_info("SYSIO registers are not implemented\n");
+ return;
+ default:
+ if (!(gctx->io_mode & 0x80)) {
+ pr_info("Bad IO mode\n");
+ return;
+ }
+ if (!gctx->iio[gctx->io_mode & 0xFF]) {
+ pr_info("Undefined indirect IO write method %d\n",
+ gctx->io_mode & 0x7F);
+ return;
+ }
+ atom_iio_execute(gctx, gctx->iio[gctx->io_mode & 0xFF],
+ idx, val);
+ }
+ break;
+ case ATOM_ARG_PS:
+ idx = U8(*ptr);
+ (*ptr)++;
+ DEBUG("PS[0x%02X]", idx);
+ ctx->ps[idx] = cpu_to_le32(val);
+ break;
+ case ATOM_ARG_WS:
+ idx = U8(*ptr);
+ (*ptr)++;
+ DEBUG("WS[0x%02X]", idx);
+ switch (idx) {
+ case ATOM_WS_QUOTIENT:
+ gctx->divmul[0] = val;
+ break;
+ case ATOM_WS_REMAINDER:
+ gctx->divmul[1] = val;
+ break;
+ case ATOM_WS_DATAPTR:
+ gctx->data_block = val;
+ break;
+ case ATOM_WS_SHIFT:
+ gctx->shift = val;
+ break;
+ case ATOM_WS_OR_MASK:
+ case ATOM_WS_AND_MASK:
+ break;
+ case ATOM_WS_FB_WINDOW:
+ gctx->fb_base = val;
+ break;
+ case ATOM_WS_ATTRIBUTES:
+ gctx->io_attr = val;
+ break;
+ case ATOM_WS_REGPTR:
+ gctx->reg_block = val;
+ break;
+ default:
+ ctx->ws[idx] = val;
+ }
+ break;
+ case ATOM_ARG_FB:
+ idx = U8(*ptr);
+ (*ptr)++;
+ if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) {
+ DRM_ERROR("ATOM: fb write beyond scratch region: %d vs. %d\n",
+ gctx->fb_base + (idx * 4), gctx->scratch_size_bytes);
+ } else
+ gctx->scratch[(gctx->fb_base / 4) + idx] = val;
+ DEBUG("FB[0x%02X]", idx);
+ break;
+ case ATOM_ARG_PLL:
+ idx = U8(*ptr);
+ (*ptr)++;
+ DEBUG("PLL[0x%02X]", idx);
+ gctx->card->pll_write(gctx->card, idx, val);
+ break;
+ case ATOM_ARG_MC:
+ idx = U8(*ptr);
+ (*ptr)++;
+ DEBUG("MC[0x%02X]", idx);
+ gctx->card->mc_write(gctx->card, idx, val);
+ return;
+ }
+ switch (align) {
+ case ATOM_SRC_DWORD:
+ DEBUG(".[31:0] <- 0x%08X\n", old_val);
+ break;
+ case ATOM_SRC_WORD0:
+ DEBUG(".[15:0] <- 0x%04X\n", old_val);
+ break;
+ case ATOM_SRC_WORD8:
+ DEBUG(".[23:8] <- 0x%04X\n", old_val);
+ break;
+ case ATOM_SRC_WORD16:
+ DEBUG(".[31:16] <- 0x%04X\n", old_val);
+ break;
+ case ATOM_SRC_BYTE0:
+ DEBUG(".[7:0] <- 0x%02X\n", old_val);
+ break;
+ case ATOM_SRC_BYTE8:
+ DEBUG(".[15:8] <- 0x%02X\n", old_val);
+ break;
+ case ATOM_SRC_BYTE16:
+ DEBUG(".[23:16] <- 0x%02X\n", old_val);
+ break;
+ case ATOM_SRC_BYTE24:
+ DEBUG(".[31:24] <- 0x%02X\n", old_val);
+ break;
+ }
+}
+
+static void atom_op_add(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst += src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_and(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst &= src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_beep(atom_exec_context *ctx, int *ptr, int arg)
+{
+ printk("ATOM BIOS beeped!\n");
+}
+
+static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
+{
+ int idx = U8((*ptr)++);
+ int r = 0;
+
+ if (idx < ATOM_TABLE_NAMES_CNT)
+ SDEBUG(" table: %d (%s)\n", idx, atom_table_names[idx]);
+ else
+ SDEBUG(" table: %d\n", idx);
+ if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
+ r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
+ if (r) {
+ ctx->abort = true;
+ }
+}
+
+static void atom_op_clear(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t saved;
+ int dptr = *ptr;
+ attr &= 0x38;
+ attr |= atom_def_dst[attr >> 3] << 6;
+ atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, 0, saved);
+}
+
+static void atom_op_compare(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ ctx->ctx->cs_equal = (dst == src);
+ ctx->ctx->cs_above = (dst > src);
+ SDEBUG(" result: %s %s\n", ctx->ctx->cs_equal ? "EQ" : "NE",
+ ctx->ctx->cs_above ? "GT" : "LE");
+}
+
+static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg)
+{
+ unsigned count = U8((*ptr)++);
+ SDEBUG(" count: %d\n", count);
+ if (arg == ATOM_UNIT_MICROSEC)
+ udelay(count);
+ else if (!drm_can_sleep())
+ mdelay(count);
+ else
+ msleep(count);
+}
+
+static void atom_op_div(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ if (src != 0) {
+ ctx->ctx->divmul[0] = dst / src;
+ ctx->ctx->divmul[1] = dst % src;
+ } else {
+ ctx->ctx->divmul[0] = 0;
+ ctx->ctx->divmul[1] = 0;
+ }
+}
+
+static void atom_op_div32(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint64_t val64;
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ if (src != 0) {
+ val64 = dst;
+ val64 |= ((uint64_t)ctx->ctx->divmul[1]) << 32;
+ do_div(val64, src);
+ ctx->ctx->divmul[0] = lower_32_bits(val64);
+ ctx->ctx->divmul[1] = upper_32_bits(val64);
+ } else {
+ ctx->ctx->divmul[0] = 0;
+ ctx->ctx->divmul[1] = 0;
+ }
+}
+
+static void atom_op_eot(atom_exec_context *ctx, int *ptr, int arg)
+{
+ /* functionally, a nop */
+}
+
+static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
+{
+ int execute = 0, target = U16(*ptr);
+ unsigned long cjiffies;
+
+ (*ptr) += 2;
+ switch (arg) {
+ case ATOM_COND_ABOVE:
+ execute = ctx->ctx->cs_above;
+ break;
+ case ATOM_COND_ABOVEOREQUAL:
+ execute = ctx->ctx->cs_above || ctx->ctx->cs_equal;
+ break;
+ case ATOM_COND_ALWAYS:
+ execute = 1;
+ break;
+ case ATOM_COND_BELOW:
+ execute = !(ctx->ctx->cs_above || ctx->ctx->cs_equal);
+ break;
+ case ATOM_COND_BELOWOREQUAL:
+ execute = !ctx->ctx->cs_above;
+ break;
+ case ATOM_COND_EQUAL:
+ execute = ctx->ctx->cs_equal;
+ break;
+ case ATOM_COND_NOTEQUAL:
+ execute = !ctx->ctx->cs_equal;
+ break;
+ }
+ if (arg != ATOM_COND_ALWAYS)
+ SDEBUG(" taken: %s\n", str_yes_no(execute));
+ SDEBUG(" target: 0x%04X\n", target);
+ if (execute) {
+ if (ctx->last_jump == (ctx->start + target)) {
+ cjiffies = jiffies;
+ if (time_after(cjiffies, ctx->last_jump_jiffies)) {
+ cjiffies -= ctx->last_jump_jiffies;
+ if ((jiffies_to_msecs(cjiffies) > ATOM_CMD_TIMEOUT_SEC*1000)) {
+ DRM_ERROR("atombios stuck in loop for more than %dsecs aborting\n",
+ ATOM_CMD_TIMEOUT_SEC);
+ ctx->abort = true;
+ }
+ } else {
+ /* jiffies wrap around we will just wait a little longer */
+ ctx->last_jump_jiffies = jiffies;
+ }
+ } else {
+ ctx->last_jump = ctx->start + target;
+ ctx->last_jump_jiffies = jiffies;
+ }
+ *ptr = ctx->start + target;
+ }
+}
+
+static void atom_op_mask(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, mask, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ mask = atom_get_src_direct(ctx, ((attr >> 3) & 7), ptr);
+ SDEBUG(" mask: 0x%08x", mask);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst &= mask;
+ dst |= src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_move(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t src, saved;
+ int dptr = *ptr;
+ if (((attr >> 3) & 7) != ATOM_SRC_DWORD)
+ atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+ else {
+ atom_skip_dst(ctx, arg, attr, ptr);
+ saved = 0xCDCDCDCD;
+ }
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, src, saved);
+}
+
+static void atom_op_mul(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ ctx->ctx->divmul[0] = dst * src;
+}
+
+static void atom_op_mul32(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint64_t val64;
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ val64 = (uint64_t)dst * (uint64_t)src;
+ ctx->ctx->divmul[0] = lower_32_bits(val64);
+ ctx->ctx->divmul[1] = upper_32_bits(val64);
+}
+
+static void atom_op_nop(atom_exec_context *ctx, int *ptr, int arg)
+{
+ /* nothing */
+}
+
+static void atom_op_or(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst |= src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_postcard(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t val = U8((*ptr)++);
+ SDEBUG("POST card output: 0x%02X\n", val);
+}
+
+static void atom_op_repeat(atom_exec_context *ctx, int *ptr, int arg)
+{
+ pr_info("unimplemented!\n");
+}
+
+static void atom_op_restorereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+ pr_info("unimplemented!\n");
+}
+
+static void atom_op_savereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+ pr_info("unimplemented!\n");
+}
+
+static void atom_op_setdatablock(atom_exec_context *ctx, int *ptr, int arg)
+{
+ int idx = U8(*ptr);
+ (*ptr)++;
+ SDEBUG(" block: %d\n", idx);
+ if (!idx)
+ ctx->ctx->data_block = 0;
+ else if (idx == 255)
+ ctx->ctx->data_block = ctx->start;
+ else
+ ctx->ctx->data_block = U16(ctx->ctx->data_table + 4 + 2 * idx);
+ SDEBUG(" base: 0x%04X\n", ctx->ctx->data_block);
+}
+
+static void atom_op_setfbbase(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ SDEBUG(" fb_base: ");
+ ctx->ctx->fb_base = atom_get_src(ctx, attr, ptr);
+}
+
+static void atom_op_setport(atom_exec_context *ctx, int *ptr, int arg)
+{
+ int port;
+ switch (arg) {
+ case ATOM_PORT_ATI:
+ port = U16(*ptr);
+ if (port < ATOM_IO_NAMES_CNT)
+ SDEBUG(" port: %d (%s)\n", port, atom_io_names[port]);
+ else
+ SDEBUG(" port: %d\n", port);
+ if (!port)
+ ctx->ctx->io_mode = ATOM_IO_MM;
+ else
+ ctx->ctx->io_mode = ATOM_IO_IIO | port;
+ (*ptr) += 2;
+ break;
+ case ATOM_PORT_PCI:
+ ctx->ctx->io_mode = ATOM_IO_PCI;
+ (*ptr)++;
+ break;
+ case ATOM_PORT_SYSIO:
+ ctx->ctx->io_mode = ATOM_IO_SYSIO;
+ (*ptr)++;
+ break;
+ }
+}
+
+static void atom_op_setregblock(atom_exec_context *ctx, int *ptr, int arg)
+{
+ ctx->ctx->reg_block = U16(*ptr);
+ (*ptr) += 2;
+ SDEBUG(" base: 0x%04X\n", ctx->ctx->reg_block);
+}
+
+static void atom_op_shift_left(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++), shift;
+ uint32_t saved, dst;
+ int dptr = *ptr;
+ attr &= 0x38;
+ attr |= atom_def_dst[attr >> 3] << 6;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ shift = atom_get_src_direct(ctx, ATOM_SRC_BYTE0, ptr);
+ SDEBUG(" shift: %d\n", shift);
+ dst <<= shift;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shift_right(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++), shift;
+ uint32_t saved, dst;
+ int dptr = *ptr;
+ attr &= 0x38;
+ attr |= atom_def_dst[attr >> 3] << 6;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ shift = atom_get_src_direct(ctx, ATOM_SRC_BYTE0, ptr);
+ SDEBUG(" shift: %d\n", shift);
+ dst >>= shift;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shl(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++), shift;
+ uint32_t saved, dst;
+ int dptr = *ptr;
+ uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3];
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ /* op needs to full dst value */
+ dst = saved;
+ shift = atom_get_src(ctx, attr, ptr);
+ SDEBUG(" shift: %d\n", shift);
+ dst <<= shift;
+ dst &= atom_arg_mask[dst_align];
+ dst >>= atom_arg_shift[dst_align];
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shr(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++), shift;
+ uint32_t saved, dst;
+ int dptr = *ptr;
+ uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3];
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ /* op needs to full dst value */
+ dst = saved;
+ shift = atom_get_src(ctx, attr, ptr);
+ SDEBUG(" shift: %d\n", shift);
+ dst >>= shift;
+ dst &= atom_arg_mask[dst_align];
+ dst >>= atom_arg_shift[dst_align];
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_sub(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst -= src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_switch(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t src, val, target;
+ SDEBUG(" switch: ");
+ src = atom_get_src(ctx, attr, ptr);
+ while (U16(*ptr) != ATOM_CASE_END)
+ if (U8(*ptr) == ATOM_CASE_MAGIC) {
+ (*ptr)++;
+ SDEBUG(" case: ");
+ val =
+ atom_get_src(ctx, (attr & 0x38) | ATOM_ARG_IMM,
+ ptr);
+ target = U16(*ptr);
+ if (val == src) {
+ SDEBUG(" target: %04X\n", target);
+ *ptr = ctx->start + target;
+ return;
+ }
+ (*ptr) += 2;
+ } else {
+ pr_info("Bad case\n");
+ return;
+ }
+ (*ptr) += 2;
+}
+
+static void atom_op_test(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src;
+ SDEBUG(" src1: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+ SDEBUG(" src2: ");
+ src = atom_get_src(ctx, attr, ptr);
+ ctx->ctx->cs_equal = ((dst & src) == 0);
+ SDEBUG(" result: %s\n", ctx->ctx->cs_equal ? "EQ" : "NE");
+}
+
+static void atom_op_xor(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t attr = U8((*ptr)++);
+ uint32_t dst, src, saved;
+ int dptr = *ptr;
+ SDEBUG(" dst: ");
+ dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+ SDEBUG(" src: ");
+ src = atom_get_src(ctx, attr, ptr);
+ dst ^= src;
+ SDEBUG(" dst: ");
+ atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_debug(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint8_t val = U8((*ptr)++);
+ SDEBUG("DEBUG output: 0x%02X\n", val);
+}
+
+static void atom_op_processds(atom_exec_context *ctx, int *ptr, int arg)
+{
+ uint16_t val = U16(*ptr);
+ (*ptr) += val + 2;
+ SDEBUG("PROCESSDS output: 0x%02X\n", val);
+}
+
+static struct {
+ void (*func) (atom_exec_context *, int *, int);
+ int arg;
+} opcode_table[ATOM_OP_CNT] = {
+ {
+ NULL, 0}, {
+ atom_op_move, ATOM_ARG_REG}, {
+ atom_op_move, ATOM_ARG_PS}, {
+ atom_op_move, ATOM_ARG_WS}, {
+ atom_op_move, ATOM_ARG_FB}, {
+ atom_op_move, ATOM_ARG_PLL}, {
+ atom_op_move, ATOM_ARG_MC}, {
+ atom_op_and, ATOM_ARG_REG}, {
+ atom_op_and, ATOM_ARG_PS}, {
+ atom_op_and, ATOM_ARG_WS}, {
+ atom_op_and, ATOM_ARG_FB}, {
+ atom_op_and, ATOM_ARG_PLL}, {
+ atom_op_and, ATOM_ARG_MC}, {
+ atom_op_or, ATOM_ARG_REG}, {
+ atom_op_or, ATOM_ARG_PS}, {
+ atom_op_or, ATOM_ARG_WS}, {
+ atom_op_or, ATOM_ARG_FB}, {
+ atom_op_or, ATOM_ARG_PLL}, {
+ atom_op_or, ATOM_ARG_MC}, {
+ atom_op_shift_left, ATOM_ARG_REG}, {
+ atom_op_shift_left, ATOM_ARG_PS}, {
+ atom_op_shift_left, ATOM_ARG_WS}, {
+ atom_op_shift_left, ATOM_ARG_FB}, {
+ atom_op_shift_left, ATOM_ARG_PLL}, {
+ atom_op_shift_left, ATOM_ARG_MC}, {
+ atom_op_shift_right, ATOM_ARG_REG}, {
+ atom_op_shift_right, ATOM_ARG_PS}, {
+ atom_op_shift_right, ATOM_ARG_WS}, {
+ atom_op_shift_right, ATOM_ARG_FB}, {
+ atom_op_shift_right, ATOM_ARG_PLL}, {
+ atom_op_shift_right, ATOM_ARG_MC}, {
+ atom_op_mul, ATOM_ARG_REG}, {
+ atom_op_mul, ATOM_ARG_PS}, {
+ atom_op_mul, ATOM_ARG_WS}, {
+ atom_op_mul, ATOM_ARG_FB}, {
+ atom_op_mul, ATOM_ARG_PLL}, {
+ atom_op_mul, ATOM_ARG_MC}, {
+ atom_op_div, ATOM_ARG_REG}, {
+ atom_op_div, ATOM_ARG_PS}, {
+ atom_op_div, ATOM_ARG_WS}, {
+ atom_op_div, ATOM_ARG_FB}, {
+ atom_op_div, ATOM_ARG_PLL}, {
+ atom_op_div, ATOM_ARG_MC}, {
+ atom_op_add, ATOM_ARG_REG}, {
+ atom_op_add, ATOM_ARG_PS}, {
+ atom_op_add, ATOM_ARG_WS}, {
+ atom_op_add, ATOM_ARG_FB}, {
+ atom_op_add, ATOM_ARG_PLL}, {
+ atom_op_add, ATOM_ARG_MC}, {
+ atom_op_sub, ATOM_ARG_REG}, {
+ atom_op_sub, ATOM_ARG_PS}, {
+ atom_op_sub, ATOM_ARG_WS}, {
+ atom_op_sub, ATOM_ARG_FB}, {
+ atom_op_sub, ATOM_ARG_PLL}, {
+ atom_op_sub, ATOM_ARG_MC}, {
+ atom_op_setport, ATOM_PORT_ATI}, {
+ atom_op_setport, ATOM_PORT_PCI}, {
+ atom_op_setport, ATOM_PORT_SYSIO}, {
+ atom_op_setregblock, 0}, {
+ atom_op_setfbbase, 0}, {
+ atom_op_compare, ATOM_ARG_REG}, {
+ atom_op_compare, ATOM_ARG_PS}, {
+ atom_op_compare, ATOM_ARG_WS}, {
+ atom_op_compare, ATOM_ARG_FB}, {
+ atom_op_compare, ATOM_ARG_PLL}, {
+ atom_op_compare, ATOM_ARG_MC}, {
+ atom_op_switch, 0}, {
+ atom_op_jump, ATOM_COND_ALWAYS}, {
+ atom_op_jump, ATOM_COND_EQUAL}, {
+ atom_op_jump, ATOM_COND_BELOW}, {
+ atom_op_jump, ATOM_COND_ABOVE}, {
+ atom_op_jump, ATOM_COND_BELOWOREQUAL}, {
+ atom_op_jump, ATOM_COND_ABOVEOREQUAL}, {
+ atom_op_jump, ATOM_COND_NOTEQUAL}, {
+ atom_op_test, ATOM_ARG_REG}, {
+ atom_op_test, ATOM_ARG_PS}, {
+ atom_op_test, ATOM_ARG_WS}, {
+ atom_op_test, ATOM_ARG_FB}, {
+ atom_op_test, ATOM_ARG_PLL}, {
+ atom_op_test, ATOM_ARG_MC}, {
+ atom_op_delay, ATOM_UNIT_MILLISEC}, {
+ atom_op_delay, ATOM_UNIT_MICROSEC}, {
+ atom_op_calltable, 0}, {
+ atom_op_repeat, 0}, {
+ atom_op_clear, ATOM_ARG_REG}, {
+ atom_op_clear, ATOM_ARG_PS}, {
+ atom_op_clear, ATOM_ARG_WS}, {
+ atom_op_clear, ATOM_ARG_FB}, {
+ atom_op_clear, ATOM_ARG_PLL}, {
+ atom_op_clear, ATOM_ARG_MC}, {
+ atom_op_nop, 0}, {
+ atom_op_eot, 0}, {
+ atom_op_mask, ATOM_ARG_REG}, {
+ atom_op_mask, ATOM_ARG_PS}, {
+ atom_op_mask, ATOM_ARG_WS}, {
+ atom_op_mask, ATOM_ARG_FB}, {
+ atom_op_mask, ATOM_ARG_PLL}, {
+ atom_op_mask, ATOM_ARG_MC}, {
+ atom_op_postcard, 0}, {
+ atom_op_beep, 0}, {
+ atom_op_savereg, 0}, {
+ atom_op_restorereg, 0}, {
+ atom_op_setdatablock, 0}, {
+ atom_op_xor, ATOM_ARG_REG}, {
+ atom_op_xor, ATOM_ARG_PS}, {
+ atom_op_xor, ATOM_ARG_WS}, {
+ atom_op_xor, ATOM_ARG_FB}, {
+ atom_op_xor, ATOM_ARG_PLL}, {
+ atom_op_xor, ATOM_ARG_MC}, {
+ atom_op_shl, ATOM_ARG_REG}, {
+ atom_op_shl, ATOM_ARG_PS}, {
+ atom_op_shl, ATOM_ARG_WS}, {
+ atom_op_shl, ATOM_ARG_FB}, {
+ atom_op_shl, ATOM_ARG_PLL}, {
+ atom_op_shl, ATOM_ARG_MC}, {
+ atom_op_shr, ATOM_ARG_REG}, {
+ atom_op_shr, ATOM_ARG_PS}, {
+ atom_op_shr, ATOM_ARG_WS}, {
+ atom_op_shr, ATOM_ARG_FB}, {
+ atom_op_shr, ATOM_ARG_PLL}, {
+ atom_op_shr, ATOM_ARG_MC}, {
+ atom_op_debug, 0}, {
+ atom_op_processds, 0}, {
+ atom_op_mul32, ATOM_ARG_PS}, {
+ atom_op_mul32, ATOM_ARG_WS}, {
+ atom_op_div32, ATOM_ARG_PS}, {
+ atom_op_div32, ATOM_ARG_WS},
+};
+
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)
+{
+ int base = CU16(ctx->cmd_table + 4 + 2 * index);
+ int len, ws, ps, ptr;
+ unsigned char op;
+ atom_exec_context ectx;
+ int ret = 0;
+
+ if (!base)
+ return -EINVAL;
+
+ len = CU16(base + ATOM_CT_SIZE_PTR);
+ ws = CU8(base + ATOM_CT_WS_PTR);
+ ps = CU8(base + ATOM_CT_PS_PTR) & ATOM_CT_PS_MASK;
+ ptr = base + ATOM_CT_CODE_PTR;
+
+ SDEBUG(">> execute %04X (len %d, WS %d, PS %d)\n", base, len, ws, ps);
+
+ ectx.ctx = ctx;
+ ectx.ps_shift = ps / 4;
+ ectx.start = base;
+ ectx.ps = params;
+ ectx.abort = false;
+ ectx.last_jump = 0;
+ if (ws)
+ ectx.ws = kcalloc(4, ws, GFP_KERNEL);
+ else
+ ectx.ws = NULL;
+
+ debug_depth++;
+ while (1) {
+ op = CU8(ptr++);
+ if (op < ATOM_OP_NAMES_CNT)
+ SDEBUG("%s @ 0x%04X\n", atom_op_names[op], ptr - 1);
+ else
+ SDEBUG("[%d] @ 0x%04X\n", op, ptr - 1);
+ if (ectx.abort) {
+ DRM_ERROR("atombios stuck executing %04X (len %d, WS %d, PS %d) @ 0x%04X\n",
+ base, len, ws, ps, ptr - 1);
+ ret = -EINVAL;
+ goto free;
+ }
+
+ if (op < ATOM_OP_CNT && op > 0)
+ opcode_table[op].func(&ectx, &ptr,
+ opcode_table[op].arg);
+ else
+ break;
+
+ if (op == ATOM_OP_EOT)
+ break;
+ }
+ debug_depth--;
+ SDEBUG("<<\n");
+
+free:
+ if (ws)
+ kfree(ectx.ws);
+ return ret;
+}
+
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)
+{
+ int r;
+
+ mutex_lock(&ctx->mutex);
+ /* reset data block */
+ ctx->data_block = 0;
+ /* reset reg block */
+ ctx->reg_block = 0;
+ /* reset fb window */
+ ctx->fb_base = 0;
+ /* reset io mode */
+ ctx->io_mode = ATOM_IO_MM;
+ /* reset divmul */
+ ctx->divmul[0] = 0;
+ ctx->divmul[1] = 0;
+ r = amdgpu_atom_execute_table_locked(ctx, index, params);
+ mutex_unlock(&ctx->mutex);
+ return r;
+}
+
+static int atom_iio_len[] = { 1, 2, 3, 3, 3, 3, 4, 4, 4, 3 };
+
+static void atom_index_iio(struct atom_context *ctx, int base)
+{
+ ctx->iio = kzalloc(2 * 256, GFP_KERNEL);
+ if (!ctx->iio)
+ return;
+ while (CU8(base) == ATOM_IIO_START) {
+ ctx->iio[CU8(base + 1)] = base + 2;
+ base += 2;
+ while (CU8(base) != ATOM_IIO_END)
+ base += atom_iio_len[CU8(base)];
+ base += 3;
+ }
+}
+
+static void atom_get_vbios_name(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned char str_num;
+ unsigned short off_to_vbios_str;
+ unsigned char *c_ptr;
+ int name_size;
+ int i;
+
+ const char *na = "--N/A--";
+ char *back;
+
+ p_rom = ctx->bios;
+
+ str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS);
+ if (str_num != 0) {
+ off_to_vbios_str =
+ *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ c_ptr = (unsigned char *)(p_rom + off_to_vbios_str);
+ } else {
+ /* do not know where to find name */
+ memcpy(ctx->name, na, 7);
+ ctx->name[7] = 0;
+ return;
+ }
+
+ /*
+ * skip the atombios strings, usually 4
+ * 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type
+ */
+ for (i = 0; i < str_num; i++) {
+ while (*c_ptr != 0)
+ c_ptr++;
+ c_ptr++;
+ }
+
+ /* skip the following 2 chars: 0x0D 0x0A */
+ c_ptr += 2;
+
+ name_size = strnlen(c_ptr, STRLEN_LONG - 1);
+ memcpy(ctx->name, c_ptr, name_size);
+ back = ctx->name + name_size;
+ while ((*--back) == ' ')
+ ;
+ *(back + 1) = '\0';
+}
+
+static void atom_get_vbios_date(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned char *date_in_rom;
+
+ p_rom = ctx->bios;
+
+ date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE;
+
+ ctx->date[0] = '2';
+ ctx->date[1] = '0';
+ ctx->date[2] = date_in_rom[6];
+ ctx->date[3] = date_in_rom[7];
+ ctx->date[4] = '/';
+ ctx->date[5] = date_in_rom[0];
+ ctx->date[6] = date_in_rom[1];
+ ctx->date[7] = '/';
+ ctx->date[8] = date_in_rom[3];
+ ctx->date[9] = date_in_rom[4];
+ ctx->date[10] = ' ';
+ ctx->date[11] = date_in_rom[9];
+ ctx->date[12] = date_in_rom[10];
+ ctx->date[13] = date_in_rom[11];
+ ctx->date[14] = date_in_rom[12];
+ ctx->date[15] = date_in_rom[13];
+ ctx->date[16] = '\0';
+}
+
+static unsigned char *atom_find_str_in_rom(struct atom_context *ctx, char *str, int start,
+ int end, int maxlen)
+{
+ unsigned long str_off;
+ unsigned char *p_rom;
+ unsigned short str_len;
+
+ str_off = 0;
+ str_len = strnlen(str, maxlen);
+ p_rom = ctx->bios;
+
+ for (; start <= end; ++start) {
+ for (str_off = 0; str_off < str_len; ++str_off) {
+ if (str[str_off] != *(p_rom + start + str_off))
+ break;
+ }
+
+ if (str_off == str_len || str[str_off] == 0)
+ return p_rom + start;
+ }
+ return NULL;
+}
+
+static void atom_get_vbios_pn(struct atom_context *ctx)
+{
+ unsigned char *p_rom;
+ unsigned short off_to_vbios_str;
+ unsigned char *vbios_str;
+ int count;
+
+ off_to_vbios_str = 0;
+ p_rom = ctx->bios;
+
+ if (*(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS) != 0) {
+ off_to_vbios_str =
+ *(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ vbios_str = (unsigned char *)(p_rom + off_to_vbios_str);
+ } else {
+ vbios_str = p_rom + OFFSET_TO_VBIOS_PART_NUMBER;
+ }
+
+ if (*vbios_str == 0) {
+ vbios_str = atom_find_str_in_rom(ctx, BIOS_ATOM_PREFIX, 3, 1024, 64);
+ if (vbios_str == NULL)
+ vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
+ }
+ if (vbios_str != NULL && *vbios_str == 0)
+ vbios_str++;
+
+ if (vbios_str != NULL) {
+ count = 0;
+ while ((count < BIOS_STRING_LENGTH) && vbios_str[count] >= ' ' &&
+ vbios_str[count] <= 'z') {
+ ctx->vbios_pn[count] = vbios_str[count];
+ count++;
+ }
+
+ ctx->vbios_pn[count] = 0;
+ }
+
+ pr_info("ATOM BIOS: %s\n", ctx->vbios_pn);
+}
+
+static void atom_get_vbios_version(struct atom_context *ctx)
+{
+ unsigned char *vbios_ver;
+
+ /* find anchor ATOMBIOSBK-AMD */
+ vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64);
+ if (vbios_ver != NULL) {
+ /* skip ATOMBIOSBK-AMD VER */
+ vbios_ver += 18;
+ memcpy(ctx->vbios_ver_str, vbios_ver, STRLEN_NORMAL);
+ } else {
+ ctx->vbios_ver_str[0] = '\0';
+ }
+}
+
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
+{
+ int base;
+ struct atom_context *ctx =
+ kzalloc(sizeof(struct atom_context), GFP_KERNEL);
+ struct _ATOM_ROM_HEADER *atom_rom_header;
+ struct _ATOM_MASTER_DATA_TABLE *master_table;
+ struct _ATOM_FIRMWARE_INFO *atom_fw_info;
+
+ if (!ctx)
+ return NULL;
+
+ ctx->card = card;
+ ctx->bios = bios;
+
+ if (CU16(0) != ATOM_BIOS_MAGIC) {
+ pr_info("Invalid BIOS magic\n");
+ kfree(ctx);
+ return NULL;
+ }
+ if (strncmp
+ (CSTR(ATOM_ATI_MAGIC_PTR), ATOM_ATI_MAGIC,
+ strlen(ATOM_ATI_MAGIC))) {
+ pr_info("Invalid ATI magic\n");
+ kfree(ctx);
+ return NULL;
+ }
+
+ base = CU16(ATOM_ROM_TABLE_PTR);
+ if (strncmp
+ (CSTR(base + ATOM_ROM_MAGIC_PTR), ATOM_ROM_MAGIC,
+ strlen(ATOM_ROM_MAGIC))) {
+ pr_info("Invalid ATOM magic\n");
+ kfree(ctx);
+ return NULL;
+ }
+
+ ctx->cmd_table = CU16(base + ATOM_ROM_CMD_PTR);
+ ctx->data_table = CU16(base + ATOM_ROM_DATA_PTR);
+ atom_index_iio(ctx, CU16(ctx->data_table + ATOM_DATA_IIO_PTR) + 4);
+ if (!ctx->iio) {
+ amdgpu_atom_destroy(ctx);
+ return NULL;
+ }
+
+ atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base);
+ if (atom_rom_header->usMasterDataTableOffset != 0) {
+ master_table = (struct _ATOM_MASTER_DATA_TABLE *)
+ CSTR(atom_rom_header->usMasterDataTableOffset);
+ if (master_table->ListOfDataTables.FirmwareInfo != 0) {
+ atom_fw_info = (struct _ATOM_FIRMWARE_INFO *)
+ CSTR(master_table->ListOfDataTables.FirmwareInfo);
+ ctx->version = atom_fw_info->ulFirmwareRevision;
+ }
+ }
+
+ atom_get_vbios_name(ctx);
+ atom_get_vbios_pn(ctx);
+ atom_get_vbios_date(ctx);
+ atom_get_vbios_version(ctx);
+
+ return ctx;
+}
+
+int amdgpu_atom_asic_init(struct atom_context *ctx)
+{
+ int hwi = CU16(ctx->data_table + ATOM_DATA_FWI_PTR);
+ uint32_t ps[16];
+ int ret;
+
+ memset(ps, 0, 64);
+
+ ps[0] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFSCLK_PTR));
+ ps[1] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFMCLK_PTR));
+ if (!ps[0] || !ps[1])
+ return 1;
+
+ if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
+ return 1;
+ ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps);
+ if (ret)
+ return ret;
+
+ memset(ps, 0, 64);
+
+ return ret;
+}
+
+void amdgpu_atom_destroy(struct atom_context *ctx)
+{
+ kfree(ctx->iio);
+ kfree(ctx);
+}
+
+bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index,
+ uint16_t *size, uint8_t *frev, uint8_t *crev,
+ uint16_t *data_start)
+{
+ int offset = index * 2 + 4;
+ int idx = CU16(ctx->data_table + offset);
+ u16 *mdt = (u16 *)(ctx->bios + ctx->data_table + 4);
+
+ if (!mdt[index])
+ return false;
+
+ if (size)
+ *size = CU16(idx);
+ if (frev)
+ *frev = CU8(idx + 2);
+ if (crev)
+ *crev = CU8(idx + 3);
+ *data_start = idx;
+ return true;
+}
+
+bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev,
+ uint8_t *crev)
+{
+ int offset = index * 2 + 4;
+ int idx = CU16(ctx->cmd_table + offset);
+ u16 *mct = (u16 *)(ctx->bios + ctx->cmd_table + 4);
+
+ if (!mct[index])
+ return false;
+
+ if (frev)
+ *frev = CU8(idx + 2);
+ if (crev)
+ *crev = CU8(idx + 3);
+ return true;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
new file mode 100644
index 000000000..c11cf18a0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#ifndef ATOM_H
+#define ATOM_H
+
+#include <linux/types.h>
+
+struct drm_device;
+
+#define ATOM_BIOS_MAGIC 0xAA55
+#define ATOM_ATI_MAGIC_PTR 0x30
+#define ATOM_ATI_MAGIC " 761295520"
+#define ATOM_ROM_TABLE_PTR 0x48
+
+#define ATOM_ROM_MAGIC "ATOM"
+#define ATOM_ROM_MAGIC_PTR 4
+
+#define ATOM_ROM_MSG_PTR 0x10
+#define ATOM_ROM_CMD_PTR 0x1E
+#define ATOM_ROM_DATA_PTR 0x20
+
+#define ATOM_CMD_INIT 0
+#define ATOM_CMD_SETSCLK 0x0A
+#define ATOM_CMD_SETMCLK 0x0B
+#define ATOM_CMD_SETPCLK 0x0C
+#define ATOM_CMD_SPDFANCNTL 0x39
+
+#define ATOM_DATA_FWI_PTR 0xC
+#define ATOM_DATA_IIO_PTR 0x32
+
+#define ATOM_FWI_DEFSCLK_PTR 8
+#define ATOM_FWI_DEFMCLK_PTR 0xC
+#define ATOM_FWI_MAXSCLK_PTR 0x24
+#define ATOM_FWI_MAXMCLK_PTR 0x28
+
+#define ATOM_CT_SIZE_PTR 0
+#define ATOM_CT_WS_PTR 4
+#define ATOM_CT_PS_PTR 5
+#define ATOM_CT_PS_MASK 0x7F
+#define ATOM_CT_CODE_PTR 6
+
+#define ATOM_OP_CNT 127
+#define ATOM_OP_EOT 91
+
+#define ATOM_CASE_MAGIC 0x63
+#define ATOM_CASE_END 0x5A5A
+
+#define ATOM_ARG_REG 0
+#define ATOM_ARG_PS 1
+#define ATOM_ARG_WS 2
+#define ATOM_ARG_FB 3
+#define ATOM_ARG_ID 4
+#define ATOM_ARG_IMM 5
+#define ATOM_ARG_PLL 6
+#define ATOM_ARG_MC 7
+
+#define ATOM_SRC_DWORD 0
+#define ATOM_SRC_WORD0 1
+#define ATOM_SRC_WORD8 2
+#define ATOM_SRC_WORD16 3
+#define ATOM_SRC_BYTE0 4
+#define ATOM_SRC_BYTE8 5
+#define ATOM_SRC_BYTE16 6
+#define ATOM_SRC_BYTE24 7
+
+#define ATOM_WS_QUOTIENT 0x40
+#define ATOM_WS_REMAINDER 0x41
+#define ATOM_WS_DATAPTR 0x42
+#define ATOM_WS_SHIFT 0x43
+#define ATOM_WS_OR_MASK 0x44
+#define ATOM_WS_AND_MASK 0x45
+#define ATOM_WS_FB_WINDOW 0x46
+#define ATOM_WS_ATTRIBUTES 0x47
+#define ATOM_WS_REGPTR 0x48
+
+#define ATOM_IIO_NOP 0
+#define ATOM_IIO_START 1
+#define ATOM_IIO_READ 2
+#define ATOM_IIO_WRITE 3
+#define ATOM_IIO_CLEAR 4
+#define ATOM_IIO_SET 5
+#define ATOM_IIO_MOVE_INDEX 6
+#define ATOM_IIO_MOVE_ATTR 7
+#define ATOM_IIO_MOVE_DATA 8
+#define ATOM_IIO_END 9
+
+#define ATOM_IO_MM 0
+#define ATOM_IO_PCI 1
+#define ATOM_IO_SYSIO 2
+#define ATOM_IO_IIO 0x80
+
+#define STRLEN_NORMAL 32
+#define STRLEN_LONG 64
+#define STRLEN_VERYLONG 254
+
+struct card_info {
+ struct drm_device *dev;
+ void (*reg_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*reg_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*mc_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*mc_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+ void (*pll_write)(struct card_info *info,
+ u32 reg, uint32_t val); /* filled by driver */
+ uint32_t (*pll_read)(struct card_info *info, uint32_t reg); /* filled by driver */
+};
+
+struct atom_context {
+ struct card_info *card;
+ struct mutex mutex;
+ void *bios;
+ uint32_t cmd_table, data_table;
+ uint16_t *iio;
+
+ uint16_t data_block;
+ uint32_t fb_base;
+ uint32_t divmul[2];
+ uint16_t io_attr;
+ uint16_t reg_block;
+ uint8_t shift;
+ int cs_equal, cs_above;
+ int io_mode;
+ uint32_t *scratch;
+ int scratch_size_bytes;
+
+ uint8_t name[STRLEN_LONG];
+ uint8_t vbios_pn[STRLEN_LONG];
+ uint32_t version;
+ uint8_t vbios_ver_str[STRLEN_NORMAL];
+ uint8_t date[STRLEN_NORMAL];
+};
+
+extern int amdgpu_atom_debug;
+
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+int amdgpu_atom_asic_init(struct atom_context *ctx);
+void amdgpu_atom_destroy(struct atom_context *ctx);
+bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
+ uint8_t *frev, uint8_t *crev, uint16_t *data_start);
+bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
+ uint8_t *frev, uint8_t *crev);
+#include "atom-types.h"
+#include "atombios.h"
+#include "ObjectID.h"
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
new file mode 100644
index 000000000..10098fdd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_fixed.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "atombios_crtc.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+
+void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ SET_CRTC_OVERSCAN_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
+ int a1, a2;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucCRTC = amdgpu_crtc->crtc_id;
+
+ switch (amdgpu_crtc->rmx_type) {
+ case RMX_CENTER:
+ args.usOverscanTop = cpu_to_le16((adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2);
+ args.usOverscanBottom = cpu_to_le16((adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2);
+ args.usOverscanLeft = cpu_to_le16((adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2);
+ args.usOverscanRight = cpu_to_le16((adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2);
+ break;
+ case RMX_ASPECT:
+ a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
+ a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
+
+ if (a1 > a2) {
+ args.usOverscanLeft = cpu_to_le16((adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2);
+ args.usOverscanRight = cpu_to_le16((adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2);
+ } else if (a2 > a1) {
+ args.usOverscanTop = cpu_to_le16((adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2);
+ args.usOverscanBottom = cpu_to_le16((adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2);
+ }
+ break;
+ case RMX_FULL:
+ default:
+ args.usOverscanRight = cpu_to_le16(amdgpu_crtc->h_border);
+ args.usOverscanLeft = cpu_to_le16(amdgpu_crtc->h_border);
+ args.usOverscanBottom = cpu_to_le16(amdgpu_crtc->v_border);
+ args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
+ break;
+ }
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ ENABLE_SCALER_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucScaler = amdgpu_crtc->crtc_id;
+
+ switch (amdgpu_crtc->rmx_type) {
+ case RMX_FULL:
+ args.ucEnable = ATOM_SCALER_EXPANSION;
+ break;
+ case RMX_CENTER:
+ args.ucEnable = ATOM_SCALER_CENTER;
+ break;
+ case RMX_ASPECT:
+ args.ucEnable = ATOM_SCALER_EXPANSION;
+ break;
+ default:
+ args.ucEnable = ATOM_SCALER_DISABLE;
+ break;
+ }
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int index =
+ GetIndexIntoMasterTable(COMMAND, UpdateCRTC_DoubleBufferRegisters);
+ ENABLE_CRTC_PS_ALLOCATION args;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucCRTC = amdgpu_crtc->crtc_id;
+ args.ucEnable = lock;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int index = GetIndexIntoMasterTable(COMMAND, EnableCRTC);
+ ENABLE_CRTC_PS_ALLOCATION args;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucCRTC = amdgpu_crtc->crtc_id;
+ args.ucEnable = state;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int index = GetIndexIntoMasterTable(COMMAND, BlankCRTC);
+ BLANK_CRTC_PS_ALLOCATION args;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucCRTC = amdgpu_crtc->crtc_id;
+ args.ucBlanking = state;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucDispPipeId = amdgpu_crtc->crtc_id;
+ args.ucEnable = state;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
+{
+ int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
+
+ memset(&args, 0, sizeof(args));
+
+ args.ucEnable = ATOM_INIT;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ SET_CRTC_USING_DTD_TIMING_PARAMETERS args;
+ int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_UsingDTDTiming);
+ u16 misc = 0;
+
+ memset(&args, 0, sizeof(args));
+ args.usH_Size = cpu_to_le16(mode->crtc_hdisplay - (amdgpu_crtc->h_border * 2));
+ args.usH_Blanking_Time =
+ cpu_to_le16(mode->crtc_hblank_end - mode->crtc_hdisplay + (amdgpu_crtc->h_border * 2));
+ args.usV_Size = cpu_to_le16(mode->crtc_vdisplay - (amdgpu_crtc->v_border * 2));
+ args.usV_Blanking_Time =
+ cpu_to_le16(mode->crtc_vblank_end - mode->crtc_vdisplay + (amdgpu_crtc->v_border * 2));
+ args.usH_SyncOffset =
+ cpu_to_le16(mode->crtc_hsync_start - mode->crtc_hdisplay + amdgpu_crtc->h_border);
+ args.usH_SyncWidth =
+ cpu_to_le16(mode->crtc_hsync_end - mode->crtc_hsync_start);
+ args.usV_SyncOffset =
+ cpu_to_le16(mode->crtc_vsync_start - mode->crtc_vdisplay + amdgpu_crtc->v_border);
+ args.usV_SyncWidth =
+ cpu_to_le16(mode->crtc_vsync_end - mode->crtc_vsync_start);
+ args.ucH_Border = amdgpu_crtc->h_border;
+ args.ucV_Border = amdgpu_crtc->v_border;
+
+ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+ misc |= ATOM_VSYNC_POLARITY;
+ if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+ misc |= ATOM_HSYNC_POLARITY;
+ if (mode->flags & DRM_MODE_FLAG_CSYNC)
+ misc |= ATOM_COMPOSITESYNC;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ misc |= ATOM_INTERLACE;
+ if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+ misc |= ATOM_DOUBLE_CLOCK_MODE;
+
+ args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
+ args.ucCRTC = amdgpu_crtc->crtc_id;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+union atom_enable_ss {
+ ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION v1;
+ ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2 v2;
+ ENABLE_SPREAD_SPECTRUM_ON_PPLL_V3 v3;
+};
+
+static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
+ int enable,
+ int pll_id,
+ int crtc_id,
+ struct amdgpu_atom_ss *ss)
+{
+ unsigned i;
+ int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL);
+ union atom_enable_ss args;
+
+ if (enable) {
+ /* Don't mess with SS if percentage is 0 or external ss.
+ * SS is already disabled previously, and disabling it
+ * again can cause display problems if the pll is already
+ * programmed.
+ */
+ if (ss->percentage == 0)
+ return;
+ if (ss->type & ATOM_EXTERNAL_SS_MASK)
+ return;
+ } else {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i] &&
+ adev->mode_info.crtcs[i]->enabled &&
+ i != crtc_id &&
+ pll_id == adev->mode_info.crtcs[i]->pll_id) {
+ /* one other crtc is using this pll don't turn
+ * off spread spectrum as it might turn off
+ * display on active crtc
+ */
+ return;
+ }
+ }
+ }
+
+ memset(&args, 0, sizeof(args));
+
+ args.v3.usSpreadSpectrumAmountFrac = cpu_to_le16(0);
+ args.v3.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
+ switch (pll_id) {
+ case ATOM_PPLL1:
+ args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL;
+ break;
+ case ATOM_PPLL2:
+ args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P2PLL;
+ break;
+ case ATOM_DCPLL:
+ args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_DCPLL;
+ break;
+ case ATOM_PPLL_INVALID:
+ return;
+ }
+ args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+ args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
+ args.v3.ucEnable = enable;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+union adjust_pixel_clock {
+ ADJUST_DISPLAY_PLL_PS_ALLOCATION v1;
+ ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3 v3;
+};
+
+static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_encoder *encoder = amdgpu_crtc->encoder;
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ u32 adjusted_clock = mode->clock;
+ int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ u32 dp_clock = mode->clock;
+ u32 clock = mode->clock;
+ int bpc = amdgpu_crtc->bpc;
+ bool is_duallink = amdgpu_dig_monitor_is_duallink(encoder, mode->clock);
+ union adjust_pixel_clock args;
+ u8 frev, crev;
+ int index;
+
+ amdgpu_crtc->pll_flags = AMDGPU_PLL_USE_FRAC_FB_DIV;
+
+ if ((amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)) {
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+
+ dp_clock = dig_connector->dp_clock;
+ }
+ }
+
+ /* use recommended ref_div for ss */
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ if (amdgpu_crtc->ss_enabled) {
+ if (amdgpu_crtc->ss.refdiv) {
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_REF_DIV;
+ amdgpu_crtc->pll_reference_div = amdgpu_crtc->ss.refdiv;
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+ }
+ }
+ }
+
+ /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
+ if (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
+ adjusted_clock = mode->clock * 2;
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_PREFER_CLOSEST_LOWER;
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_IS_LCD;
+
+
+ /* adjust pll for deep color modes */
+ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+ switch (bpc) {
+ case 8:
+ default:
+ break;
+ case 10:
+ clock = (clock * 5) / 4;
+ break;
+ case 12:
+ clock = (clock * 3) / 2;
+ break;
+ case 16:
+ clock = clock * 2;
+ break;
+ }
+ }
+
+ /* DCE3+ has an AdjustDisplayPll that will adjust the pixel clock
+ * accordingly based on the encoder/transmitter to work around
+ * special hw requirements.
+ */
+ index = GetIndexIntoMasterTable(COMMAND, AdjustDisplayPll);
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+ &crev))
+ return adjusted_clock;
+
+ memset(&args, 0, sizeof(args));
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ case 2:
+ args.v1.usPixelClock = cpu_to_le16(clock / 10);
+ args.v1.ucTransmitterID = amdgpu_encoder->encoder_id;
+ args.v1.ucEncodeMode = encoder_mode;
+ if (amdgpu_crtc->ss_enabled && amdgpu_crtc->ss.percentage)
+ args.v1.ucConfig |=
+ ADJUST_DISPLAY_CONFIG_SS_ENABLE;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args);
+ adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
+ break;
+ case 3:
+ args.v3.sInput.usPixelClock = cpu_to_le16(clock / 10);
+ args.v3.sInput.ucTransmitterID = amdgpu_encoder->encoder_id;
+ args.v3.sInput.ucEncodeMode = encoder_mode;
+ args.v3.sInput.ucDispPllConfig = 0;
+ if (amdgpu_crtc->ss_enabled && amdgpu_crtc->ss.percentage)
+ args.v3.sInput.ucDispPllConfig |=
+ DISPPLL_CONFIG_SS_ENABLE;
+ if (ENCODER_MODE_IS_DP(encoder_mode)) {
+ args.v3.sInput.ucDispPllConfig |=
+ DISPPLL_CONFIG_COHERENT_MODE;
+ /* 16200 or 27000 */
+ args.v3.sInput.usPixelClock = cpu_to_le16(dp_clock / 10);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ if (dig->coherent_mode)
+ args.v3.sInput.ucDispPllConfig |=
+ DISPPLL_CONFIG_COHERENT_MODE;
+ if (is_duallink)
+ args.v3.sInput.ucDispPllConfig |=
+ DISPPLL_CONFIG_DUAL_LINK;
+ }
+ if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+ ENCODER_OBJECT_ID_NONE)
+ args.v3.sInput.ucExtTransmitterID =
+ amdgpu_encoder_get_dp_bridge_encoder_id(encoder);
+ else
+ args.v3.sInput.ucExtTransmitterID = 0;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args);
+ adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
+ if (args.v3.sOutput.ucRefDiv) {
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_REF_DIV;
+ amdgpu_crtc->pll_reference_div = args.v3.sOutput.ucRefDiv;
+ }
+ if (args.v3.sOutput.ucPostDiv) {
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+ amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_POST_DIV;
+ amdgpu_crtc->pll_post_div = args.v3.sOutput.ucPostDiv;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return adjusted_clock;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return adjusted_clock;
+ }
+
+ return adjusted_clock;
+}
+
+union set_pixel_clock {
+ SET_PIXEL_CLOCK_PS_ALLOCATION base;
+ PIXEL_CLOCK_PARAMETERS v1;
+ PIXEL_CLOCK_PARAMETERS_V2 v2;
+ PIXEL_CLOCK_PARAMETERS_V3 v3;
+ PIXEL_CLOCK_PARAMETERS_V5 v5;
+ PIXEL_CLOCK_PARAMETERS_V6 v6;
+ PIXEL_CLOCK_PARAMETERS_V7 v7;
+};
+
+/* on DCE5, make sure the voltage is high enough to support the
+ * required disp clk.
+ */
+void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+ u32 dispclk)
+{
+ u8 frev, crev;
+ int index;
+ union set_pixel_clock args;
+
+ memset(&args, 0, sizeof(args));
+
+ index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+ &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 5:
+ /* if the default dcpll clock is specified,
+ * SetPixelClock provides the dividers
+ */
+ args.v5.ucCRTC = ATOM_CRTC_INVALID;
+ args.v5.usPixelClock = cpu_to_le16(dispclk);
+ args.v5.ucPpll = ATOM_DCPLL;
+ break;
+ case 6:
+ /* if the default dcpll clock is specified,
+ * SetPixelClock provides the dividers
+ */
+ args.v6.ulDispEngClkFreq = cpu_to_le32(dispclk);
+ if (adev->asic_type == CHIP_TAHITI ||
+ adev->asic_type == CHIP_PITCAIRN ||
+ adev->asic_type == CHIP_VERDE ||
+ adev->asic_type == CHIP_OLAND)
+ args.v6.ucPpll = ATOM_PPLL0;
+ else
+ args.v6.ucPpll = ATOM_EXT_PLL1;
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return;
+ }
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+union set_dce_clock {
+ SET_DCE_CLOCK_PS_ALLOCATION_V1_1 v1_1;
+ SET_DCE_CLOCK_PS_ALLOCATION_V2_1 v2_1;
+};
+
+u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
+ u32 freq, u8 clk_type, u8 clk_src)
+{
+ u8 frev, crev;
+ int index;
+ union set_dce_clock args;
+ u32 ret_freq = 0;
+
+ memset(&args, 0, sizeof(args));
+
+ index = GetIndexIntoMasterTable(COMMAND, SetDCEClock);
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+ &crev))
+ return 0;
+
+ switch (frev) {
+ case 2:
+ switch (crev) {
+ case 1:
+ args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
+ args.v2_1.asParam.ucDCEClkType = clk_type;
+ args.v2_1.asParam.ucDCEClkSrc = clk_src;
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return 0;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return 0;
+ }
+
+ return ret_freq;
+}
+
+static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id)
+{
+ if (ENCODER_MODE_IS_DP(encoder_mode)) {
+ if (pll_id < ATOM_EXT_PLL1)
+ return true;
+ else
+ return false;
+ } else {
+ return true;
+ }
+}
+
+void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+ u32 crtc_id,
+ int pll_id,
+ u32 encoder_mode,
+ u32 encoder_id,
+ u32 clock,
+ u32 ref_div,
+ u32 fb_div,
+ u32 frac_fb_div,
+ u32 post_div,
+ int bpc,
+ bool ss_enabled,
+ struct amdgpu_atom_ss *ss)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u8 frev, crev;
+ int index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+ union set_pixel_clock args;
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+ &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ if (clock == ATOM_DISABLE)
+ return;
+ args.v1.usPixelClock = cpu_to_le16(clock / 10);
+ args.v1.usRefDiv = cpu_to_le16(ref_div);
+ args.v1.usFbDiv = cpu_to_le16(fb_div);
+ args.v1.ucFracFbDiv = frac_fb_div;
+ args.v1.ucPostDiv = post_div;
+ args.v1.ucPpll = pll_id;
+ args.v1.ucCRTC = crtc_id;
+ args.v1.ucRefDivSrc = 1;
+ break;
+ case 2:
+ args.v2.usPixelClock = cpu_to_le16(clock / 10);
+ args.v2.usRefDiv = cpu_to_le16(ref_div);
+ args.v2.usFbDiv = cpu_to_le16(fb_div);
+ args.v2.ucFracFbDiv = frac_fb_div;
+ args.v2.ucPostDiv = post_div;
+ args.v2.ucPpll = pll_id;
+ args.v2.ucCRTC = crtc_id;
+ args.v2.ucRefDivSrc = 1;
+ break;
+ case 3:
+ args.v3.usPixelClock = cpu_to_le16(clock / 10);
+ args.v3.usRefDiv = cpu_to_le16(ref_div);
+ args.v3.usFbDiv = cpu_to_le16(fb_div);
+ args.v3.ucFracFbDiv = frac_fb_div;
+ args.v3.ucPostDiv = post_div;
+ args.v3.ucPpll = pll_id;
+ if (crtc_id == ATOM_CRTC2)
+ args.v3.ucMiscInfo = PIXEL_CLOCK_MISC_CRTC_SEL_CRTC2;
+ else
+ args.v3.ucMiscInfo = PIXEL_CLOCK_MISC_CRTC_SEL_CRTC1;
+ if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
+ args.v3.ucMiscInfo |= PIXEL_CLOCK_MISC_REF_DIV_SRC;
+ args.v3.ucTransmitterId = encoder_id;
+ args.v3.ucEncoderMode = encoder_mode;
+ break;
+ case 5:
+ args.v5.ucCRTC = crtc_id;
+ args.v5.usPixelClock = cpu_to_le16(clock / 10);
+ args.v5.ucRefDiv = ref_div;
+ args.v5.usFbDiv = cpu_to_le16(fb_div);
+ args.v5.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+ args.v5.ucPostDiv = post_div;
+ args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
+ if ((ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) &&
+ (pll_id < ATOM_EXT_PLL1))
+ args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_REF_DIV_SRC;
+ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+ switch (bpc) {
+ case 8:
+ default:
+ args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP;
+ break;
+ case 10:
+ /* yes this is correct, the atom define is wrong */
+ args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_32BPP;
+ break;
+ case 12:
+ /* yes this is correct, the atom define is wrong */
+ args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP;
+ break;
+ }
+ }
+ args.v5.ucTransmitterID = encoder_id;
+ args.v5.ucEncoderMode = encoder_mode;
+ args.v5.ucPpll = pll_id;
+ break;
+ case 6:
+ args.v6.ulDispEngClkFreq = cpu_to_le32(crtc_id << 24 | clock / 10);
+ args.v6.ucRefDiv = ref_div;
+ args.v6.usFbDiv = cpu_to_le16(fb_div);
+ args.v6.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+ args.v6.ucPostDiv = post_div;
+ args.v6.ucMiscInfo = 0; /* HDMI depth, etc. */
+ if ((ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) &&
+ (pll_id < ATOM_EXT_PLL1) &&
+ !is_pixel_clock_source_from_pll(encoder_mode, pll_id))
+ args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_REF_DIV_SRC;
+ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+ switch (bpc) {
+ case 8:
+ default:
+ args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP;
+ break;
+ case 10:
+ args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP_V6;
+ break;
+ case 12:
+ args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP_V6;
+ break;
+ case 16:
+ args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP;
+ break;
+ }
+ }
+ args.v6.ucTransmitterID = encoder_id;
+ args.v6.ucEncoderMode = encoder_mode;
+ args.v6.ucPpll = pll_id;
+ break;
+ case 7:
+ args.v7.ulPixelClock = cpu_to_le32(clock * 10); /* 100 hz units */
+ args.v7.ucMiscInfo = 0;
+ if ((encoder_mode == ATOM_ENCODER_MODE_DVI) &&
+ (clock > 165000))
+ args.v7.ucMiscInfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN;
+ args.v7.ucCRTC = crtc_id;
+ if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+ switch (bpc) {
+ case 8:
+ default:
+ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS;
+ break;
+ case 10:
+ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4;
+ break;
+ case 12:
+ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2;
+ break;
+ case 16:
+ args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1;
+ break;
+ }
+ }
+ args.v7.ucTransmitterID = encoder_id;
+ args.v7.ucEncoderMode = encoder_mode;
+ args.v7.ucPpll = pll_id;
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+ return;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder =
+ to_amdgpu_encoder(amdgpu_crtc->encoder);
+ int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
+
+ amdgpu_crtc->bpc = 8;
+ amdgpu_crtc->ss_enabled = false;
+
+ if ((amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(amdgpu_crtc->encoder) != ENCODER_OBJECT_ID_NONE)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector =
+ amdgpu_get_connector_for_encoder(amdgpu_crtc->encoder);
+ struct amdgpu_connector *amdgpu_connector =
+ to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+ int dp_clock;
+
+ /* Assign mode clock for hdmi deep color max clock limit check */
+ amdgpu_connector->pixelclock_for_modeset = mode->clock;
+ amdgpu_crtc->bpc = amdgpu_connector_get_monitor_bpc(connector);
+
+ switch (encoder_mode) {
+ case ATOM_ENCODER_MODE_DP_MST:
+ case ATOM_ENCODER_MODE_DP:
+ /* DP/eDP */
+ dp_clock = dig_connector->dp_clock / 10;
+ amdgpu_crtc->ss_enabled =
+ amdgpu_atombios_get_asic_ss_info(adev, &amdgpu_crtc->ss,
+ ASIC_INTERNAL_SS_ON_DP,
+ dp_clock);
+ break;
+ case ATOM_ENCODER_MODE_LVDS:
+ amdgpu_crtc->ss_enabled =
+ amdgpu_atombios_get_asic_ss_info(adev,
+ &amdgpu_crtc->ss,
+ dig->lcd_ss_id,
+ mode->clock / 10);
+ break;
+ case ATOM_ENCODER_MODE_DVI:
+ amdgpu_crtc->ss_enabled =
+ amdgpu_atombios_get_asic_ss_info(adev,
+ &amdgpu_crtc->ss,
+ ASIC_INTERNAL_SS_ON_TMDS,
+ mode->clock / 10);
+ break;
+ case ATOM_ENCODER_MODE_HDMI:
+ amdgpu_crtc->ss_enabled =
+ amdgpu_atombios_get_asic_ss_info(adev,
+ &amdgpu_crtc->ss,
+ ASIC_INTERNAL_SS_ON_HDMI,
+ mode->clock / 10);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* adjust pixel clock as needed */
+ amdgpu_crtc->adjusted_clock = amdgpu_atombios_crtc_adjust_pll(crtc, mode);
+
+ return 0;
+}
+
+void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder =
+ to_amdgpu_encoder(amdgpu_crtc->encoder);
+ u32 pll_clock = mode->clock;
+ u32 clock = mode->clock;
+ u32 ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
+ struct amdgpu_pll *pll;
+ int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
+
+ /* pass the actual clock to amdgpu_atombios_crtc_program_pll for HDMI */
+ if ((encoder_mode == ATOM_ENCODER_MODE_HDMI) &&
+ (amdgpu_crtc->bpc > 8))
+ clock = amdgpu_crtc->adjusted_clock;
+
+ switch (amdgpu_crtc->pll_id) {
+ case ATOM_PPLL1:
+ pll = &adev->clock.ppll[0];
+ break;
+ case ATOM_PPLL2:
+ pll = &adev->clock.ppll[1];
+ break;
+ case ATOM_PPLL0:
+ case ATOM_PPLL_INVALID:
+ default:
+ pll = &adev->clock.ppll[2];
+ break;
+ }
+
+ /* update pll params */
+ pll->flags = amdgpu_crtc->pll_flags;
+ pll->reference_div = amdgpu_crtc->pll_reference_div;
+ pll->post_div = amdgpu_crtc->pll_post_div;
+
+ amdgpu_pll_compute(adev, pll, amdgpu_crtc->adjusted_clock, &pll_clock,
+ &fb_div, &frac_fb_div, &ref_div, &post_div);
+
+ amdgpu_atombios_crtc_program_ss(adev, ATOM_DISABLE, amdgpu_crtc->pll_id,
+ amdgpu_crtc->crtc_id, &amdgpu_crtc->ss);
+
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ encoder_mode, amdgpu_encoder->encoder_id, clock,
+ ref_div, fb_div, frac_fb_div, post_div,
+ amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
+
+ if (amdgpu_crtc->ss_enabled) {
+ /* calculate ss amount and step size */
+ u32 step_size;
+ u32 amount = (((fb_div * 10) + frac_fb_div) *
+ (u32)amdgpu_crtc->ss.percentage) /
+ (100 * (u32)amdgpu_crtc->ss.percentage_divider);
+ amdgpu_crtc->ss.amount = (amount / 10) & ATOM_PPLL_SS_AMOUNT_V2_FBDIV_MASK;
+ amdgpu_crtc->ss.amount |= ((amount - (amount / 10)) << ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT) &
+ ATOM_PPLL_SS_AMOUNT_V2_NFRAC_MASK;
+ if (amdgpu_crtc->ss.type & ATOM_PPLL_SS_TYPE_V2_CENTRE_SPREAD)
+ step_size = (4 * amount * ref_div * ((u32)amdgpu_crtc->ss.rate * 2048)) /
+ (125 * 25 * pll->reference_freq / 100);
+ else
+ step_size = (2 * amount * ref_div * ((u32)amdgpu_crtc->ss.rate * 2048)) /
+ (125 * 25 * pll->reference_freq / 100);
+ amdgpu_crtc->ss.step = step_size;
+
+ amdgpu_atombios_crtc_program_ss(adev, ATOM_ENABLE, amdgpu_crtc->pll_id,
+ amdgpu_crtc->crtc_id, &amdgpu_crtc->ss);
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
new file mode 100644
index 000000000..0eeda8e3b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_CRTC_H__
+#define __ATOMBIOS_CRTC_H__
+
+void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);
+void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc);
+void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock);
+void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev);
+void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
+ struct drm_display_mode *mode);
+void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+ u32 dispclk);
+u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
+ u32 freq, u8 clk_type, u8 clk_src);
+void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+ u32 crtc_id,
+ int pll_id,
+ u32 encoder_mode,
+ u32 encoder_id,
+ u32 clock,
+ u32 ref_div,
+ u32 fb_div,
+ u32 frac_fb_div,
+ u32 post_div,
+ int bpc,
+ bool ss_enabled,
+ struct amdgpu_atom_ss *ss);
+int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
+ struct drm_display_mode *mode);
+void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc,
+ struct drm_display_mode *mode);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
new file mode 100644
index 000000000..87c41e0e9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -0,0 +1,770 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ * Jerome Glisse
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/display/drm_dp_helper.h>
+
+#include "amdgpu.h"
+
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_atombios.h"
+
+/* move these to drm_dp_helper.c/h */
+#define DP_LINK_CONFIGURATION_SIZE 9
+#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
+
+static char *voltage_names[] = {
+ "0.4V", "0.6V", "0.8V", "1.2V"
+};
+static char *pre_emph_names[] = {
+ "0dB", "3.5dB", "6dB", "9.5dB"
+};
+
+/***** amdgpu AUX functions *****/
+
+union aux_channel_transaction {
+ PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
+ PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
+};
+
+static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
+ u8 *send, int send_bytes,
+ u8 *recv, int recv_size,
+ u8 delay, u8 *ack)
+{
+ struct drm_device *dev = chan->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union aux_channel_transaction args;
+ int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
+ unsigned char *base;
+ int recv_bytes;
+ int r = 0;
+
+ memset(&args, 0, sizeof(args));
+
+ mutex_lock(&chan->mutex);
+
+ base = (unsigned char *)(adev->mode_info.atom_context->scratch + 1);
+
+ amdgpu_atombios_copy_swap(base, send, send_bytes, true);
+
+ args.v2.lpAuxRequest = cpu_to_le16((u16)(0 + 4));
+ args.v2.lpDataOut = cpu_to_le16((u16)(16 + 4));
+ args.v2.ucDataOutLen = 0;
+ args.v2.ucChannelID = chan->rec.i2c_id;
+ args.v2.ucDelay = delay / 10;
+ args.v2.ucHPD_ID = chan->rec.hpd;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ *ack = args.v2.ucReplyStatus;
+
+ /* timeout */
+ if (args.v2.ucReplyStatus == 1) {
+ r = -ETIMEDOUT;
+ goto done;
+ }
+
+ /* flags not zero */
+ if (args.v2.ucReplyStatus == 2) {
+ DRM_DEBUG_KMS("dp_aux_ch flags not zero\n");
+ r = -EIO;
+ goto done;
+ }
+
+ /* error */
+ if (args.v2.ucReplyStatus == 3) {
+ DRM_DEBUG_KMS("dp_aux_ch error\n");
+ r = -EIO;
+ goto done;
+ }
+
+ recv_bytes = args.v1.ucDataOutLen;
+ if (recv_bytes > recv_size)
+ recv_bytes = recv_size;
+
+ if (recv && recv_size)
+ amdgpu_atombios_copy_swap(recv, base + 16, recv_bytes, false);
+
+ r = recv_bytes;
+done:
+ mutex_unlock(&chan->mutex);
+
+ return r;
+}
+
+#define BARE_ADDRESS_SIZE 3
+#define HEADER_SIZE (BARE_ADDRESS_SIZE + 1)
+
+static ssize_t
+amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
+{
+ struct amdgpu_i2c_chan *chan =
+ container_of(aux, struct amdgpu_i2c_chan, aux);
+ int ret;
+ u8 tx_buf[20];
+ size_t tx_size;
+ u8 ack, delay = 0;
+
+ if (WARN_ON(msg->size > 16))
+ return -E2BIG;
+
+ tx_buf[0] = msg->address & 0xff;
+ tx_buf[1] = msg->address >> 8;
+ tx_buf[2] = (msg->request << 4) |
+ ((msg->address >> 16) & 0xf);
+ tx_buf[3] = msg->size ? (msg->size - 1) : 0;
+
+ switch (msg->request & ~DP_AUX_I2C_MOT) {
+ case DP_AUX_NATIVE_WRITE:
+ case DP_AUX_I2C_WRITE:
+ /* tx_size needs to be 4 even for bare address packets since the atom
+ * table needs the info in tx_buf[3].
+ */
+ tx_size = HEADER_SIZE + msg->size;
+ if (msg->size == 0)
+ tx_buf[3] |= BARE_ADDRESS_SIZE << 4;
+ else
+ tx_buf[3] |= tx_size << 4;
+ memcpy(tx_buf + HEADER_SIZE, msg->buffer, msg->size);
+ ret = amdgpu_atombios_dp_process_aux_ch(chan,
+ tx_buf, tx_size, NULL, 0, delay, &ack);
+ if (ret >= 0)
+ /* Return payload size. */
+ ret = msg->size;
+ break;
+ case DP_AUX_NATIVE_READ:
+ case DP_AUX_I2C_READ:
+ /* tx_size needs to be 4 even for bare address packets since the atom
+ * table needs the info in tx_buf[3].
+ */
+ tx_size = HEADER_SIZE;
+ if (msg->size == 0)
+ tx_buf[3] |= BARE_ADDRESS_SIZE << 4;
+ else
+ tx_buf[3] |= tx_size << 4;
+ ret = amdgpu_atombios_dp_process_aux_ch(chan,
+ tx_buf, tx_size, msg->buffer, msg->size, delay, &ack);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret >= 0)
+ msg->reply = ack >> 4;
+
+ return ret;
+}
+
+void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector)
+{
+ amdgpu_connector->ddc_bus->rec.hpd = amdgpu_connector->hpd.hpd;
+ amdgpu_connector->ddc_bus->aux.transfer = amdgpu_atombios_dp_aux_transfer;
+ amdgpu_connector->ddc_bus->aux.drm_dev = amdgpu_connector->base.dev;
+
+ drm_dp_aux_init(&amdgpu_connector->ddc_bus->aux);
+ amdgpu_connector->ddc_bus->has_aux = true;
+}
+
+/***** general DP utility functions *****/
+
+#define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_LEVEL_3
+#define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPH_LEVEL_3
+
+static void amdgpu_atombios_dp_get_adjust_train(const u8 link_status[DP_LINK_STATUS_SIZE],
+ int lane_count,
+ u8 train_set[4])
+{
+ u8 v = 0;
+ u8 p = 0;
+ int lane;
+
+ for (lane = 0; lane < lane_count; lane++) {
+ u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+ u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
+
+ DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n",
+ lane,
+ voltage_names[this_v >> DP_TRAIN_VOLTAGE_SWING_SHIFT],
+ pre_emph_names[this_p >> DP_TRAIN_PRE_EMPHASIS_SHIFT]);
+
+ if (this_v > v)
+ v = this_v;
+ if (this_p > p)
+ p = this_p;
+ }
+
+ if (v >= DP_VOLTAGE_MAX)
+ v |= DP_TRAIN_MAX_SWING_REACHED;
+
+ if (p >= DP_PRE_EMPHASIS_MAX)
+ p |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+ DRM_DEBUG_KMS("using signal parameters: voltage %s pre_emph %s\n",
+ voltage_names[(v & DP_TRAIN_VOLTAGE_SWING_MASK) >> DP_TRAIN_VOLTAGE_SWING_SHIFT],
+ pre_emph_names[(p & DP_TRAIN_PRE_EMPHASIS_MASK) >> DP_TRAIN_PRE_EMPHASIS_SHIFT]);
+
+ for (lane = 0; lane < 4; lane++)
+ train_set[lane] = v | p;
+}
+
+/* convert bits per color to bits per pixel */
+/* get bpc from the EDID */
+static unsigned amdgpu_atombios_dp_convert_bpc_to_bpp(int bpc)
+{
+ if (bpc == 0)
+ return 24;
+ else
+ return bpc * 3;
+}
+
+/***** amdgpu specific DP functions *****/
+
+static int amdgpu_atombios_dp_get_dp_link_config(struct drm_connector *connector,
+ const u8 dpcd[DP_DPCD_SIZE],
+ unsigned pix_clock,
+ unsigned *dp_lanes, unsigned *dp_rate)
+{
+ unsigned bpp =
+ amdgpu_atombios_dp_convert_bpc_to_bpp(amdgpu_connector_get_monitor_bpc(connector));
+ static const unsigned link_rates[3] = { 162000, 270000, 540000 };
+ unsigned max_link_rate = drm_dp_max_link_rate(dpcd);
+ unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
+ unsigned lane_num, i, max_pix_clock;
+
+ if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+ ENCODER_OBJECT_ID_NUTMEG) {
+ for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+ max_pix_clock = (lane_num * 270000 * 8) / bpp;
+ if (max_pix_clock >= pix_clock) {
+ *dp_lanes = lane_num;
+ *dp_rate = 270000;
+ return 0;
+ }
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+ for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+ max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+ if (max_pix_clock >= pix_clock) {
+ *dp_lanes = lane_num;
+ *dp_rate = link_rates[i];
+ return 0;
+ }
+ }
+ }
+ }
+
+ return -EINVAL;
+}
+
+static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
+ int action, int dp_clock,
+ u8 ucconfig, u8 lane_num)
+{
+ DP_ENCODER_SERVICE_PARAMETERS args;
+ int index = GetIndexIntoMasterTable(COMMAND, DPEncoderService);
+
+ memset(&args, 0, sizeof(args));
+ args.ucLinkClock = dp_clock / 10;
+ args.ucConfig = ucconfig;
+ args.ucAction = action;
+ args.ucLaneNum = lane_num;
+ args.ucStatus = 0;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ return args.ucStatus;
+}
+
+u8 amdgpu_atombios_dp_get_sinktype(struct amdgpu_connector *amdgpu_connector)
+{
+ struct drm_device *dev = amdgpu_connector->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ return amdgpu_atombios_dp_encoder_service(adev, ATOM_DP_ACTION_GET_SINK_TYPE, 0,
+ amdgpu_connector->ddc_bus->rec.i2c_id, 0);
+}
+
+static void amdgpu_atombios_dp_probe_oui(struct amdgpu_connector *amdgpu_connector)
+{
+ struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+ u8 buf[3];
+
+ if (!(dig_connector->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT))
+ return;
+
+ if (drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_SINK_OUI, buf, 3) == 3)
+ DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
+ buf[0], buf[1], buf[2]);
+
+ if (drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_BRANCH_OUI, buf, 3) == 3)
+ DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
+ buf[0], buf[1], buf[2]);
+}
+
+static void amdgpu_atombios_dp_ds_ports(struct amdgpu_connector *amdgpu_connector)
+{
+ struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+ int ret;
+
+ if (dig_connector->dpcd[DP_DPCD_REV] > 0x10) {
+ ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux,
+ DP_DOWNSTREAM_PORT_0,
+ dig_connector->downstream_ports,
+ DP_MAX_DOWNSTREAM_PORTS);
+ if (ret)
+ memset(dig_connector->downstream_ports, 0,
+ DP_MAX_DOWNSTREAM_PORTS);
+ }
+}
+
+int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector)
+{
+ struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+ u8 msg[DP_DPCD_SIZE];
+ int ret;
+
+ ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_DPCD_REV,
+ msg, DP_DPCD_SIZE);
+ if (ret == DP_DPCD_SIZE) {
+ memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
+
+ DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd),
+ dig_connector->dpcd);
+
+ amdgpu_atombios_dp_probe_oui(amdgpu_connector);
+ amdgpu_atombios_dp_ds_ports(amdgpu_connector);
+ return 0;
+ }
+
+ dig_connector->dpcd[0] = 0;
+ return -EINVAL;
+}
+
+int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+ u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector);
+ u8 tmp;
+
+ if (!amdgpu_connector->con_priv)
+ return panel_mode;
+
+ if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
+ /* DP bridge chips */
+ if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
+ DP_EDP_CONFIGURATION_CAP, &tmp) == 1) {
+ if (tmp & 1)
+ panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+ else if ((dp_bridge == ENCODER_OBJECT_ID_NUTMEG) ||
+ (dp_bridge == ENCODER_OBJECT_ID_TRAVIS))
+ panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE;
+ else
+ panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+ }
+ } else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ /* eDP */
+ if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
+ DP_EDP_CONFIGURATION_CAP, &tmp) == 1) {
+ if (tmp & 1)
+ panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+ }
+ }
+
+ return panel_mode;
+}
+
+void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
+ const struct drm_display_mode *mode)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector;
+ int ret;
+
+ if (!amdgpu_connector->con_priv)
+ return;
+ dig_connector = amdgpu_connector->con_priv;
+
+ if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+ (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+ ret = amdgpu_atombios_dp_get_dp_link_config(connector, dig_connector->dpcd,
+ mode->clock,
+ &dig_connector->dp_lane_count,
+ &dig_connector->dp_clock);
+ if (ret) {
+ dig_connector->dp_clock = 0;
+ dig_connector->dp_lane_count = 0;
+ }
+ }
+}
+
+int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector;
+ unsigned dp_lanes, dp_clock;
+ int ret;
+
+ if (!amdgpu_connector->con_priv)
+ return MODE_CLOCK_HIGH;
+ dig_connector = amdgpu_connector->con_priv;
+
+ ret = amdgpu_atombios_dp_get_dp_link_config(connector, dig_connector->dpcd,
+ mode->clock, &dp_lanes, &dp_clock);
+ if (ret)
+ return MODE_CLOCK_HIGH;
+
+ if ((dp_clock == 540000) &&
+ (!amdgpu_connector_is_dp12_capable(connector)))
+ return MODE_CLOCK_HIGH;
+
+ return MODE_OK;
+}
+
+bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector)
+{
+ u8 link_status[DP_LINK_STATUS_SIZE];
+ struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
+
+ if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status)
+ <= 0)
+ return false;
+ if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
+ return false;
+ return true;
+}
+
+void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
+ u8 power_state)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector;
+
+ if (!amdgpu_connector->con_priv)
+ return;
+
+ dig_connector = amdgpu_connector->con_priv;
+
+ /* power up/down the sink */
+ if (dig_connector->dpcd[0] >= 0x11) {
+ drm_dp_dpcd_writeb(&amdgpu_connector->ddc_bus->aux,
+ DP_SET_POWER, power_state);
+ usleep_range(1000, 2000);
+ }
+}
+
+struct amdgpu_atombios_dp_link_train_info {
+ struct amdgpu_device *adev;
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+ int dp_clock;
+ int dp_lane_count;
+ bool tp3_supported;
+ u8 dpcd[DP_RECEIVER_CAP_SIZE];
+ u8 train_set[4];
+ u8 link_status[DP_LINK_STATUS_SIZE];
+ u8 tries;
+ struct drm_dp_aux *aux;
+};
+
+static void
+amdgpu_atombios_dp_update_vs_emph(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+ /* set the initial vs/emph on the source */
+ amdgpu_atombios_encoder_setup_dig_transmitter(dp_info->encoder,
+ ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH,
+ 0, dp_info->train_set[0]); /* sets all lanes at once */
+
+ /* set the vs/emph on the sink */
+ drm_dp_dpcd_write(dp_info->aux, DP_TRAINING_LANE0_SET,
+ dp_info->train_set, dp_info->dp_lane_count);
+}
+
+static void
+amdgpu_atombios_dp_set_tp(struct amdgpu_atombios_dp_link_train_info *dp_info, int tp)
+{
+ int rtp = 0;
+
+ /* set training pattern on the source */
+ switch (tp) {
+ case DP_TRAINING_PATTERN_1:
+ rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1;
+ break;
+ case DP_TRAINING_PATTERN_2:
+ rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2;
+ break;
+ case DP_TRAINING_PATTERN_3:
+ rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3;
+ break;
+ }
+ amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder, rtp, 0);
+
+ /* enable training pattern on the sink */
+ drm_dp_dpcd_writeb(dp_info->aux, DP_TRAINING_PATTERN_SET, tp);
+}
+
+static int
+amdgpu_atombios_dp_link_train_init(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(dp_info->encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u8 tmp;
+
+ /* power up the sink */
+ amdgpu_atombios_dp_set_rx_power_state(dp_info->connector, DP_SET_POWER_D0);
+
+ /* possibly enable downspread on the sink */
+ if (dp_info->dpcd[3] & 0x1)
+ drm_dp_dpcd_writeb(dp_info->aux,
+ DP_DOWNSPREAD_CTRL, DP_SPREAD_AMP_0_5);
+ else
+ drm_dp_dpcd_writeb(dp_info->aux,
+ DP_DOWNSPREAD_CTRL, 0);
+
+ if (dig->panel_mode == DP_PANEL_MODE_INTERNAL_DP2_MODE)
+ drm_dp_dpcd_writeb(dp_info->aux, DP_EDP_CONFIGURATION_SET, 1);
+
+ /* set the lane count on the sink */
+ tmp = dp_info->dp_lane_count;
+ if (drm_dp_enhanced_frame_cap(dp_info->dpcd))
+ tmp |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+ drm_dp_dpcd_writeb(dp_info->aux, DP_LANE_COUNT_SET, tmp);
+
+ /* set the link rate on the sink */
+ tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock);
+ drm_dp_dpcd_writeb(dp_info->aux, DP_LINK_BW_SET, tmp);
+
+ /* start training on the source */
+ amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder,
+ ATOM_ENCODER_CMD_DP_LINK_TRAINING_START, 0);
+
+ /* disable the training pattern on the sink */
+ drm_dp_dpcd_writeb(dp_info->aux,
+ DP_TRAINING_PATTERN_SET,
+ DP_TRAINING_PATTERN_DISABLE);
+
+ return 0;
+}
+
+static int
+amdgpu_atombios_dp_link_train_finish(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+ udelay(400);
+
+ /* disable the training pattern on the sink */
+ drm_dp_dpcd_writeb(dp_info->aux,
+ DP_TRAINING_PATTERN_SET,
+ DP_TRAINING_PATTERN_DISABLE);
+
+ /* disable the training pattern on the source */
+ amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder,
+ ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE, 0);
+
+ return 0;
+}
+
+static int
+amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+ bool clock_recovery;
+ u8 voltage;
+ int i;
+
+ amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_1);
+ memset(dp_info->train_set, 0, 4);
+ amdgpu_atombios_dp_update_vs_emph(dp_info);
+
+ udelay(400);
+
+ /* clock recovery loop */
+ clock_recovery = false;
+ dp_info->tries = 0;
+ voltage = 0xff;
+ while (1) {
+ drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
+
+ if (drm_dp_dpcd_read_link_status(dp_info->aux,
+ dp_info->link_status) <= 0) {
+ DRM_ERROR("displayport link status failed\n");
+ break;
+ }
+
+ if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ clock_recovery = true;
+ break;
+ }
+
+ for (i = 0; i < dp_info->dp_lane_count; i++) {
+ if ((dp_info->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
+ break;
+ }
+ if (i == dp_info->dp_lane_count) {
+ DRM_ERROR("clock recovery reached max voltage\n");
+ break;
+ }
+
+ if ((dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
+ ++dp_info->tries;
+ if (dp_info->tries == 5) {
+ DRM_ERROR("clock recovery tried 5 times\n");
+ break;
+ }
+ } else
+ dp_info->tries = 0;
+
+ voltage = dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+
+ /* Compute new train_set as requested by sink */
+ amdgpu_atombios_dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count,
+ dp_info->train_set);
+
+ amdgpu_atombios_dp_update_vs_emph(dp_info);
+ }
+ if (!clock_recovery) {
+ DRM_ERROR("clock recovery failed\n");
+ return -1;
+ } else {
+ DRM_DEBUG_KMS("clock recovery at voltage %d pre-emphasis %d\n",
+ dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+ (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK) >>
+ DP_TRAIN_PRE_EMPHASIS_SHIFT);
+ return 0;
+ }
+}
+
+static int
+amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+ bool channel_eq;
+
+ if (dp_info->tp3_supported)
+ amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_3);
+ else
+ amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_2);
+
+ /* channel equalization loop */
+ dp_info->tries = 0;
+ channel_eq = false;
+ while (1) {
+ drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
+
+ if (drm_dp_dpcd_read_link_status(dp_info->aux,
+ dp_info->link_status) <= 0) {
+ DRM_ERROR("displayport link status failed\n");
+ break;
+ }
+
+ if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ channel_eq = true;
+ break;
+ }
+
+ /* Try 5 times */
+ if (dp_info->tries > 5) {
+ DRM_ERROR("channel eq failed: 5 tries\n");
+ break;
+ }
+
+ /* Compute new train_set as requested by sink */
+ amdgpu_atombios_dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count,
+ dp_info->train_set);
+
+ amdgpu_atombios_dp_update_vs_emph(dp_info);
+ dp_info->tries++;
+ }
+
+ if (!channel_eq) {
+ DRM_ERROR("channel eq failed\n");
+ return -1;
+ } else {
+ DRM_DEBUG_KMS("channel eq at voltage %d pre-emphasis %d\n",
+ dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+ (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK)
+ >> DP_TRAIN_PRE_EMPHASIS_SHIFT);
+ return 0;
+ }
+}
+
+void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_connector *amdgpu_connector;
+ struct amdgpu_connector_atom_dig *dig_connector;
+ struct amdgpu_atombios_dp_link_train_info dp_info;
+ u8 tmp;
+
+ if (!amdgpu_encoder->enc_priv)
+ return;
+
+ amdgpu_connector = to_amdgpu_connector(connector);
+ if (!amdgpu_connector->con_priv)
+ return;
+ dig_connector = amdgpu_connector->con_priv;
+
+ if ((dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT) &&
+ (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_eDP))
+ return;
+
+ if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux, DP_MAX_LANE_COUNT, &tmp)
+ == 1) {
+ if (tmp & DP_TPS3_SUPPORTED)
+ dp_info.tp3_supported = true;
+ else
+ dp_info.tp3_supported = false;
+ } else {
+ dp_info.tp3_supported = false;
+ }
+
+ memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE);
+ dp_info.adev = adev;
+ dp_info.encoder = encoder;
+ dp_info.connector = connector;
+ dp_info.dp_lane_count = dig_connector->dp_lane_count;
+ dp_info.dp_clock = dig_connector->dp_clock;
+ dp_info.aux = &amdgpu_connector->ddc_bus->aux;
+
+ if (amdgpu_atombios_dp_link_train_init(&dp_info))
+ goto done;
+ if (amdgpu_atombios_dp_link_train_cr(&dp_info))
+ goto done;
+ if (amdgpu_atombios_dp_link_train_ce(&dp_info))
+ goto done;
+done:
+ if (amdgpu_atombios_dp_link_train_finish(&dp_info))
+ return;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
new file mode 100644
index 000000000..f59d85ead
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_DP_H__
+#define __ATOMBIOS_DP_H__
+
+void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector);
+u8 amdgpu_atombios_dp_get_sinktype(struct amdgpu_connector *amdgpu_connector);
+int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector);
+int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
+ struct drm_connector *connector);
+void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
+ const struct drm_display_mode *mode);
+int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
+ struct drm_display_mode *mode);
+bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
+void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
+ u8 power_state);
+void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
+ struct drm_connector *connector);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
new file mode 100644
index 000000000..d95b2dc78
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -0,0 +1,2128 @@
+/*
+ * Copyright 2007-11 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ * Alex Deucher
+ */
+
+#include <linux/pci.h>
+
+#include <acpi/video.h>
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include <linux/backlight.h>
+#include "bif/bif_4_1_d.h"
+
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
+{
+ u8 backlight_level;
+ u32 bios_2_scratch;
+
+ bios_2_scratch = RREG32(mmBIOS_SCRATCH_2);
+
+ backlight_level = ((bios_2_scratch & ATOM_S2_CURRENT_BL_LEVEL_MASK) >>
+ ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
+
+ return backlight_level;
+}
+
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+ u8 backlight_level)
+{
+ u32 bios_2_scratch;
+
+ bios_2_scratch = RREG32(mmBIOS_SCRATCH_2);
+
+ bios_2_scratch &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+ bios_2_scratch |= ((backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+ ATOM_S2_CURRENT_BL_LEVEL_MASK);
+
+ WREG32(mmBIOS_SCRATCH_2, bios_2_scratch);
+}
+
+u8
+amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder)
+{
+ struct drm_device *dev = amdgpu_encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+ return 0;
+
+ return amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+}
+
+void
+amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
+ u8 level)
+{
+ struct drm_encoder *encoder = &amdgpu_encoder->base;
+ struct drm_device *dev = amdgpu_encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder_atom_dig *dig;
+
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+ return;
+
+ if ((amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
+ amdgpu_encoder->enc_priv) {
+ dig = amdgpu_encoder->enc_priv;
+ dig->backlight_level = level;
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, dig->backlight_level);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (dig->backlight_level == 0)
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+ else {
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_BL_BRIGHTNESS_CONTROL, 0, 0);
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
+{
+ u8 level;
+
+ /* Convert brightness to hardware level */
+ if (bd->props.brightness < 0)
+ level = 0;
+ else if (bd->props.brightness > AMDGPU_MAX_BL_LEVEL)
+ level = AMDGPU_MAX_BL_LEVEL;
+ else
+ level = bd->props.brightness;
+
+ return level;
+}
+
+static int amdgpu_atombios_encoder_update_backlight_status(struct backlight_device *bd)
+{
+ struct amdgpu_backlight_privdata *pdata = bl_get_data(bd);
+ struct amdgpu_encoder *amdgpu_encoder = pdata->encoder;
+
+ amdgpu_atombios_encoder_set_backlight_level(amdgpu_encoder,
+ amdgpu_atombios_encoder_backlight_level(bd));
+
+ return 0;
+}
+
+static int
+amdgpu_atombios_encoder_get_backlight_brightness(struct backlight_device *bd)
+{
+ struct amdgpu_backlight_privdata *pdata = bl_get_data(bd);
+ struct amdgpu_encoder *amdgpu_encoder = pdata->encoder;
+ struct drm_device *dev = amdgpu_encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ return amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+}
+
+static const struct backlight_ops amdgpu_atombios_encoder_backlight_ops = {
+ .get_brightness = amdgpu_atombios_encoder_get_backlight_brightness,
+ .update_status = amdgpu_atombios_encoder_update_backlight_status,
+};
+
+void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encoder,
+ struct drm_connector *drm_connector)
+{
+ struct drm_device *dev = amdgpu_encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct backlight_device *bd;
+ struct backlight_properties props;
+ struct amdgpu_backlight_privdata *pdata;
+ struct amdgpu_encoder_atom_dig *dig;
+ char bl_name[16];
+
+ /* Mac laptops with multiple GPUs use the gmux driver for backlight
+ * so don't register a backlight device
+ */
+ if ((adev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
+ (adev->pdev->device == 0x6741))
+ return;
+
+ if (!amdgpu_encoder->enc_priv)
+ return;
+
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+ goto register_acpi_backlight;
+
+ if (!acpi_video_backlight_use_native()) {
+ drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n");
+ goto register_acpi_backlight;
+ }
+
+ pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
+ if (!pdata) {
+ DRM_ERROR("Memory allocation failed\n");
+ goto error;
+ }
+
+ memset(&props, 0, sizeof(props));
+ props.max_brightness = AMDGPU_MAX_BL_LEVEL;
+ props.type = BACKLIGHT_RAW;
+ snprintf(bl_name, sizeof(bl_name),
+ "amdgpu_bl%d", dev->primary->index);
+ bd = backlight_device_register(bl_name, drm_connector->kdev,
+ pdata, &amdgpu_atombios_encoder_backlight_ops, &props);
+ if (IS_ERR(bd)) {
+ DRM_ERROR("Backlight registration failed\n");
+ goto error;
+ }
+
+ pdata->encoder = amdgpu_encoder;
+
+ dig = amdgpu_encoder->enc_priv;
+ dig->bl_dev = bd;
+
+ bd->props.brightness = amdgpu_atombios_encoder_get_backlight_brightness(bd);
+ bd->props.power = FB_BLANK_UNBLANK;
+ backlight_update_status(bd);
+
+ DRM_INFO("amdgpu atom DIG backlight initialized\n");
+
+ return;
+
+error:
+ kfree(pdata);
+ return;
+
+register_acpi_backlight:
+ /* Try registering an ACPI video backlight device instead. */
+ acpi_video_register_backlight();
+ return;
+}
+
+void
+amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
+{
+ struct drm_device *dev = amdgpu_encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct backlight_device *bd = NULL;
+ struct amdgpu_encoder_atom_dig *dig;
+
+ if (!amdgpu_encoder->enc_priv)
+ return;
+
+ if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+ return;
+
+ dig = amdgpu_encoder->enc_priv;
+ bd = dig->bl_dev;
+ dig->bl_dev = NULL;
+
+ if (bd) {
+ struct amdgpu_legacy_backlight_privdata *pdata;
+
+ pdata = bl_get_data(bd);
+ backlight_device_unregister(bd);
+ kfree(pdata);
+
+ DRM_INFO("amdgpu atom LVDS backlight unloaded\n");
+ }
+}
+
+bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ /* set the active encoder to connector routing */
+ amdgpu_encoder_set_active_device(encoder);
+ drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+ /* hw bug */
+ if ((mode->flags & DRM_MODE_FLAG_INTERLACE)
+ && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
+ adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
+
+ /* vertical FP must be at least 1 */
+ if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+ adjusted_mode->crtc_vsync_start++;
+
+ /* get the native mode for scaling */
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_panel_mode_fixup(encoder, adjusted_mode);
+ else if (amdgpu_encoder->rmx_type != RMX_OFF)
+ amdgpu_panel_mode_fixup(encoder, adjusted_mode);
+
+ if ((amdgpu_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)) {
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ amdgpu_atombios_dp_set_link_config(connector, adjusted_mode);
+ }
+
+ return true;
+}
+
+static void
+amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ DAC_ENCODER_CONTROL_PS_ALLOCATION args;
+ int index = 0;
+
+ memset(&args, 0, sizeof(args));
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ index = GetIndexIntoMasterTable(COMMAND, DAC1EncoderControl);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ index = GetIndexIntoMasterTable(COMMAND, DAC2EncoderControl);
+ break;
+ }
+
+ args.ucAction = action;
+ args.ucDacStandard = ATOM_DAC1_PS2;
+ args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static u8 amdgpu_atombios_encoder_get_bpc(struct drm_encoder *encoder)
+{
+ int bpc = 8;
+
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ bpc = amdgpu_crtc->bpc;
+ }
+
+ switch (bpc) {
+ case 0:
+ return PANEL_BPC_UNDEFINE;
+ case 6:
+ return PANEL_6BIT_PER_COLOR;
+ case 8:
+ default:
+ return PANEL_8BIT_PER_COLOR;
+ case 10:
+ return PANEL_10BIT_PER_COLOR;
+ case 12:
+ return PANEL_12BIT_PER_COLOR;
+ case 16:
+ return PANEL_16BIT_PER_COLOR;
+ }
+}
+
+union dvo_encoder_control {
+ ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION ext_tmds;
+ DVO_ENCODER_CONTROL_PS_ALLOCATION dvo;
+ DVO_ENCODER_CONTROL_PS_ALLOCATION_V3 dvo_v3;
+ DVO_ENCODER_CONTROL_PS_ALLOCATION_V1_4 dvo_v4;
+};
+
+static void
+amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ union dvo_encoder_control args;
+ int index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+ uint8_t frev, crev;
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ /* R4xx, R5xx */
+ args.ext_tmds.sXTmdsEncoder.ucEnable = action;
+
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.ext_tmds.sXTmdsEncoder.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+
+ args.ext_tmds.sXTmdsEncoder.ucMisc |= ATOM_PANEL_MISC_888RGB;
+ break;
+ case 2:
+ /* RS600/690/740 */
+ args.dvo.sDVOEncoder.ucAction = action;
+ args.dvo.sDVOEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ /* DFP1, CRT1, TV1 depending on the type of port */
+ args.dvo.sDVOEncoder.ucDeviceType = ATOM_DEVICE_DFP1_INDEX;
+
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.dvo.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute |= PANEL_ENCODER_MISC_DUAL;
+ break;
+ case 3:
+ /* R6xx */
+ args.dvo_v3.ucAction = action;
+ args.dvo_v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ args.dvo_v3.ucDVOConfig = 0; /* XXX */
+ break;
+ case 4:
+ /* DCE8 */
+ args.dvo_v4.ucAction = action;
+ args.dvo_v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ args.dvo_v4.ucDVOConfig = 0; /* XXX */
+ args.dvo_v4.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector;
+ struct amdgpu_connector *amdgpu_connector;
+ struct amdgpu_connector_atom_dig *dig_connector;
+
+ /* dp bridges are always DP */
+ if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)
+ return ATOM_ENCODER_MODE_DP;
+
+ /* DVO is always DVO */
+ if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DVO1) ||
+ (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1))
+ return ATOM_ENCODER_MODE_DVO;
+
+ connector = amdgpu_get_connector_for_encoder(encoder);
+ /* if we don't have an active device yet, just use one of
+ * the connectors tied to the encoder.
+ */
+ if (!connector)
+ connector = amdgpu_get_connector_for_encoder_init(encoder);
+ amdgpu_connector = to_amdgpu_connector(connector);
+
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_DVII:
+ case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */
+ if (amdgpu_audio != 0) {
+ if (amdgpu_connector->use_digital &&
+ (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE))
+ return ATOM_ENCODER_MODE_HDMI;
+ else if (connector->display_info.is_hdmi &&
+ (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+ return ATOM_ENCODER_MODE_HDMI;
+ else if (amdgpu_connector->use_digital)
+ return ATOM_ENCODER_MODE_DVI;
+ else
+ return ATOM_ENCODER_MODE_CRT;
+ } else if (amdgpu_connector->use_digital) {
+ return ATOM_ENCODER_MODE_DVI;
+ } else {
+ return ATOM_ENCODER_MODE_CRT;
+ }
+ break;
+ case DRM_MODE_CONNECTOR_DVID:
+ case DRM_MODE_CONNECTOR_HDMIA:
+ default:
+ if (amdgpu_audio != 0) {
+ if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
+ return ATOM_ENCODER_MODE_HDMI;
+ else if (connector->display_info.is_hdmi &&
+ (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+ return ATOM_ENCODER_MODE_HDMI;
+ else
+ return ATOM_ENCODER_MODE_DVI;
+ } else {
+ return ATOM_ENCODER_MODE_DVI;
+ }
+ case DRM_MODE_CONNECTOR_LVDS:
+ return ATOM_ENCODER_MODE_LVDS;
+ case DRM_MODE_CONNECTOR_DisplayPort:
+ dig_connector = amdgpu_connector->con_priv;
+ if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+ (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+ return ATOM_ENCODER_MODE_DP;
+ } else if (amdgpu_audio != 0) {
+ if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
+ return ATOM_ENCODER_MODE_HDMI;
+ else if (connector->display_info.is_hdmi &&
+ (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+ return ATOM_ENCODER_MODE_HDMI;
+ else
+ return ATOM_ENCODER_MODE_DVI;
+ } else {
+ return ATOM_ENCODER_MODE_DVI;
+ }
+ case DRM_MODE_CONNECTOR_eDP:
+ return ATOM_ENCODER_MODE_DP;
+ case DRM_MODE_CONNECTOR_DVIA:
+ case DRM_MODE_CONNECTOR_VGA:
+ return ATOM_ENCODER_MODE_CRT;
+ case DRM_MODE_CONNECTOR_Composite:
+ case DRM_MODE_CONNECTOR_SVIDEO:
+ case DRM_MODE_CONNECTOR_9PinDIN:
+ /* fix me */
+ return ATOM_ENCODER_MODE_TV;
+ }
+}
+
+/*
+ * DIG Encoder/Transmitter Setup
+ *
+ * DCE 6.0
+ * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B).
+ * Supports up to 6 digital outputs
+ * - 6 DIG encoder blocks.
+ * - DIG to PHY mapping is hardcoded
+ * DIG1 drives UNIPHY0 link A, A+B
+ * DIG2 drives UNIPHY0 link B
+ * DIG3 drives UNIPHY1 link A, A+B
+ * DIG4 drives UNIPHY1 link B
+ * DIG5 drives UNIPHY2 link A, A+B
+ * DIG6 drives UNIPHY2 link B
+ *
+ * Routing
+ * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links)
+ * Examples:
+ * crtc0 -> dig2 -> LVTMA links A+B -> TMDS/HDMI
+ * crtc1 -> dig1 -> UNIPHY0 link B -> DP
+ * crtc0 -> dig1 -> UNIPHY2 link A -> LVDS
+ * crtc1 -> dig2 -> UNIPHY1 link B+A -> TMDS/HDMI
+ */
+
+union dig_encoder_control {
+ DIG_ENCODER_CONTROL_PS_ALLOCATION v1;
+ DIG_ENCODER_CONTROL_PARAMETERS_V2 v2;
+ DIG_ENCODER_CONTROL_PARAMETERS_V3 v3;
+ DIG_ENCODER_CONTROL_PARAMETERS_V4 v4;
+ DIG_ENCODER_CONTROL_PARAMETERS_V5 v5;
+};
+
+void
+amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
+ int action, int panel_mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ union dig_encoder_control args;
+ int index = GetIndexIntoMasterTable(COMMAND, DIGxEncoderControl);
+ uint8_t frev, crev;
+ int dp_clock = 0;
+ int dp_lane_count = 0;
+ int hpd_id = AMDGPU_HPD_NONE;
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+
+ dp_clock = dig_connector->dp_clock;
+ dp_lane_count = dig_connector->dp_lane_count;
+ hpd_id = amdgpu_connector->hpd.hpd;
+ }
+
+ /* no dig encoder assigned */
+ if (dig->dig_encoder == -1)
+ return;
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ args.v1.ucAction = action;
+ args.v1.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+ args.v3.ucPanelMode = panel_mode;
+ else
+ args.v1.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode))
+ args.v1.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v1.ucLaneNum = 8;
+ else
+ args.v1.ucLaneNum = 4;
+
+ if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
+ break;
+ }
+ if (dig->linkb)
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
+ else
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+ break;
+ case 2:
+ case 3:
+ args.v3.ucAction = action;
+ args.v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+ args.v3.ucPanelMode = panel_mode;
+ else
+ args.v3.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (ENCODER_MODE_IS_DP(args.v3.ucEncoderMode))
+ args.v3.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v3.ucLaneNum = 8;
+ else
+ args.v3.ucLaneNum = 4;
+
+ if (ENCODER_MODE_IS_DP(args.v3.ucEncoderMode) && (dp_clock == 270000))
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V3_DPLINKRATE_2_70GHZ;
+ args.v3.acConfig.ucDigSel = dig->dig_encoder;
+ args.v3.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+ break;
+ case 4:
+ args.v4.ucAction = action;
+ args.v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+ args.v4.ucPanelMode = panel_mode;
+ else
+ args.v4.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (ENCODER_MODE_IS_DP(args.v4.ucEncoderMode))
+ args.v4.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v4.ucLaneNum = 8;
+ else
+ args.v4.ucLaneNum = 4;
+
+ if (ENCODER_MODE_IS_DP(args.v4.ucEncoderMode)) {
+ if (dp_clock == 540000)
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ;
+ else if (dp_clock == 324000)
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_3_24GHZ;
+ else if (dp_clock == 270000)
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_2_70GHZ;
+ else
+ args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_1_62GHZ;
+ }
+ args.v4.acConfig.ucDigSel = dig->dig_encoder;
+ args.v4.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+ if (hpd_id == AMDGPU_HPD_NONE)
+ args.v4.ucHPD_ID = 0;
+ else
+ args.v4.ucHPD_ID = hpd_id + 1;
+ break;
+ case 5:
+ switch (action) {
+ case ATOM_ENCODER_CMD_SETUP_PANEL_MODE:
+ args.v5.asDPPanelModeParam.ucAction = action;
+ args.v5.asDPPanelModeParam.ucPanelMode = panel_mode;
+ args.v5.asDPPanelModeParam.ucDigId = dig->dig_encoder;
+ break;
+ case ATOM_ENCODER_CMD_STREAM_SETUP:
+ args.v5.asStreamParam.ucAction = action;
+ args.v5.asStreamParam.ucDigId = dig->dig_encoder;
+ args.v5.asStreamParam.ucDigMode =
+ amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ if (ENCODER_MODE_IS_DP(args.v5.asStreamParam.ucDigMode))
+ args.v5.asStreamParam.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder,
+ amdgpu_encoder->pixel_clock))
+ args.v5.asStreamParam.ucLaneNum = 8;
+ else
+ args.v5.asStreamParam.ucLaneNum = 4;
+ args.v5.asStreamParam.ulPixelClock =
+ cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
+ args.v5.asStreamParam.ucBitPerColor =
+ amdgpu_atombios_encoder_get_bpc(encoder);
+ args.v5.asStreamParam.ucLinkRateIn270Mhz = dp_clock / 27000;
+ break;
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_START:
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1:
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2:
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3:
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4:
+ case ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE:
+ case ATOM_ENCODER_CMD_DP_VIDEO_OFF:
+ case ATOM_ENCODER_CMD_DP_VIDEO_ON:
+ args.v5.asCmdParam.ucAction = action;
+ args.v5.asCmdParam.ucDigId = dig->dig_encoder;
+ break;
+ default:
+ DRM_ERROR("Unsupported action 0x%x\n", action);
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+union dig_transmitter_control {
+ DIG_TRANSMITTER_CONTROL_PS_ALLOCATION v1;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5;
+ DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 v6;
+};
+
+void
+amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action,
+ uint8_t lane_num, uint8_t lane_set)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ union dig_transmitter_control args;
+ int index = 0;
+ uint8_t frev, crev;
+ bool is_dp = false;
+ int pll_id = 0;
+ int dp_clock = 0;
+ int dp_lane_count = 0;
+ int connector_object_id = 0;
+ int dig_encoder = dig->dig_encoder;
+ int hpd_id = AMDGPU_HPD_NONE;
+
+ if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+ connector = amdgpu_get_connector_for_encoder_init(encoder);
+ /* just needed to avoid bailing in the encoder check. the encoder
+ * isn't used for init
+ */
+ dig_encoder = 0;
+ } else
+ connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+
+ hpd_id = amdgpu_connector->hpd.hpd;
+ dp_clock = dig_connector->dp_clock;
+ dp_lane_count = dig_connector->dp_lane_count;
+ connector_object_id =
+ (amdgpu_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ }
+
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ pll_id = amdgpu_crtc->pll_id;
+ }
+
+ /* no dig encoder assigned */
+ if (dig_encoder == -1)
+ return;
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)))
+ is_dp = true;
+
+ memset(&args, 0, sizeof(args));
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ index = GetIndexIntoMasterTable(COMMAND, LVTMATransmitterControl);
+ break;
+ }
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ args.v1.ucAction = action;
+ if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+ args.v1.usInitInfo = cpu_to_le16(connector_object_id);
+ } else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+ args.v1.asMode.ucLaneSel = lane_num;
+ args.v1.asMode.ucLaneSet = lane_set;
+ } else {
+ if (is_dp)
+ args.v1.usPixelClock = cpu_to_le16(dp_clock / 10);
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v1.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+ else
+ args.v1.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ }
+
+ args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
+
+ if (dig_encoder)
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
+ else
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
+
+ if (dig->linkb)
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB;
+ else
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKA;
+
+ if (is_dp)
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_COHERENT;
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ if (dig->coherent_mode)
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_COHERENT;
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_8LANE_LINK;
+ }
+ break;
+ case 2:
+ args.v2.ucAction = action;
+ if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+ args.v2.usInitInfo = cpu_to_le16(connector_object_id);
+ } else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+ args.v2.asMode.ucLaneSel = lane_num;
+ args.v2.asMode.ucLaneSet = lane_set;
+ } else {
+ if (is_dp)
+ args.v2.usPixelClock = cpu_to_le16(dp_clock / 10);
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v2.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+ else
+ args.v2.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ }
+
+ args.v2.acConfig.ucEncoderSel = dig_encoder;
+ if (dig->linkb)
+ args.v2.acConfig.ucLinkSel = 1;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ args.v2.acConfig.ucTransmitterSel = 0;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ args.v2.acConfig.ucTransmitterSel = 1;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ args.v2.acConfig.ucTransmitterSel = 2;
+ break;
+ }
+
+ if (is_dp) {
+ args.v2.acConfig.fCoherentMode = 1;
+ args.v2.acConfig.fDPConnector = 1;
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ if (dig->coherent_mode)
+ args.v2.acConfig.fCoherentMode = 1;
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v2.acConfig.fDualLinkConnector = 1;
+ }
+ break;
+ case 3:
+ args.v3.ucAction = action;
+ if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+ args.v3.usInitInfo = cpu_to_le16(connector_object_id);
+ } else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+ args.v3.asMode.ucLaneSel = lane_num;
+ args.v3.asMode.ucLaneSet = lane_set;
+ } else {
+ if (is_dp)
+ args.v3.usPixelClock = cpu_to_le16(dp_clock / 10);
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v3.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+ else
+ args.v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ }
+
+ if (is_dp)
+ args.v3.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v3.ucLaneNum = 8;
+ else
+ args.v3.ucLaneNum = 4;
+
+ if (dig->linkb)
+ args.v3.acConfig.ucLinkSel = 1;
+ if (dig_encoder & 1)
+ args.v3.acConfig.ucEncoderSel = 1;
+
+ /* Select the PLL for the PHY
+ * DP PHY should be clocked from external src if there is
+ * one.
+ */
+ /* On DCE4, if there is an external clock, it generates the DP ref clock */
+ if (is_dp && adev->clock.dp_extclk)
+ args.v3.acConfig.ucRefClkSource = 2; /* external src */
+ else
+ args.v3.acConfig.ucRefClkSource = pll_id;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ args.v3.acConfig.ucTransmitterSel = 0;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ args.v3.acConfig.ucTransmitterSel = 1;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ args.v3.acConfig.ucTransmitterSel = 2;
+ break;
+ }
+
+ if (is_dp)
+ args.v3.acConfig.fCoherentMode = 1; /* DP requires coherent */
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ if (dig->coherent_mode)
+ args.v3.acConfig.fCoherentMode = 1;
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v3.acConfig.fDualLinkConnector = 1;
+ }
+ break;
+ case 4:
+ args.v4.ucAction = action;
+ if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+ args.v4.usInitInfo = cpu_to_le16(connector_object_id);
+ } else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+ args.v4.asMode.ucLaneSel = lane_num;
+ args.v4.asMode.ucLaneSet = lane_set;
+ } else {
+ if (is_dp)
+ args.v4.usPixelClock = cpu_to_le16(dp_clock / 10);
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v4.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+ else
+ args.v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ }
+
+ if (is_dp)
+ args.v4.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v4.ucLaneNum = 8;
+ else
+ args.v4.ucLaneNum = 4;
+
+ if (dig->linkb)
+ args.v4.acConfig.ucLinkSel = 1;
+ if (dig_encoder & 1)
+ args.v4.acConfig.ucEncoderSel = 1;
+
+ /* Select the PLL for the PHY
+ * DP PHY should be clocked from external src if there is
+ * one.
+ */
+ /* On DCE5 DCPLL usually generates the DP ref clock */
+ if (is_dp) {
+ if (adev->clock.dp_extclk)
+ args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_EXTCLK;
+ else
+ args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_DCPLL;
+ } else
+ args.v4.acConfig.ucRefClkSource = pll_id;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ args.v4.acConfig.ucTransmitterSel = 0;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ args.v4.acConfig.ucTransmitterSel = 1;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ args.v4.acConfig.ucTransmitterSel = 2;
+ break;
+ }
+
+ if (is_dp)
+ args.v4.acConfig.fCoherentMode = 1; /* DP requires coherent */
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ if (dig->coherent_mode)
+ args.v4.acConfig.fCoherentMode = 1;
+ if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v4.acConfig.fDualLinkConnector = 1;
+ }
+ break;
+ case 5:
+ args.v5.ucAction = action;
+ if (is_dp)
+ args.v5.usSymClock = cpu_to_le16(dp_clock / 10);
+ else
+ args.v5.usSymClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYB;
+ else
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYA;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYD;
+ else
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYC;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYF;
+ else
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYE;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYG;
+ break;
+ }
+ if (is_dp)
+ args.v5.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v5.ucLaneNum = 8;
+ else
+ args.v5.ucLaneNum = 4;
+ args.v5.ucConnObjId = connector_object_id;
+ args.v5.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (is_dp && adev->clock.dp_extclk)
+ args.v5.asConfig.ucPhyClkSrcId = ENCODER_REFCLK_SRC_EXTCLK;
+ else
+ args.v5.asConfig.ucPhyClkSrcId = pll_id;
+
+ if (is_dp)
+ args.v5.asConfig.ucCoherentMode = 1; /* DP requires coherent */
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+ if (dig->coherent_mode)
+ args.v5.asConfig.ucCoherentMode = 1;
+ }
+ if (hpd_id == AMDGPU_HPD_NONE)
+ args.v5.asConfig.ucHPDSel = 0;
+ else
+ args.v5.asConfig.ucHPDSel = hpd_id + 1;
+ args.v5.ucDigEncoderSel = 1 << dig_encoder;
+ args.v5.ucDPLaneSet = lane_set;
+ break;
+ case 6:
+ args.v6.ucAction = action;
+ if (is_dp)
+ args.v6.ulSymClock = cpu_to_le32(dp_clock / 10);
+ else
+ args.v6.ulSymClock = cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYB;
+ else
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYA;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYD;
+ else
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYC;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYF;
+ else
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYE;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYG;
+ break;
+ }
+ if (is_dp)
+ args.v6.ucLaneNum = dp_lane_count;
+ else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v6.ucLaneNum = 8;
+ else
+ args.v6.ucLaneNum = 4;
+ args.v6.ucConnObjId = connector_object_id;
+ if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH)
+ args.v6.ucDPLaneSet = lane_set;
+ else
+ args.v6.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (hpd_id == AMDGPU_HPD_NONE)
+ args.v6.ucHPDSel = 0;
+ else
+ args.v6.ucHPDSel = hpd_id + 1;
+ args.v6.ucDigEncoderSel = 1 << dig_encoder;
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+ break;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+bool
+amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
+ int action)
+{
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_device *dev = amdgpu_connector->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union dig_transmitter_control args;
+ int index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+ uint8_t frev, crev;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+ goto done;
+
+ if ((action != ATOM_TRANSMITTER_ACTION_POWER_ON) &&
+ (action != ATOM_TRANSMITTER_ACTION_POWER_OFF))
+ goto done;
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ goto done;
+
+ memset(&args, 0, sizeof(args));
+
+ args.v1.ucAction = action;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ /* wait for the panel to power up */
+ if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
+ int i;
+
+ for (i = 0; i < 300; i++) {
+ if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd))
+ return true;
+ mdelay(1);
+ }
+ return false;
+ }
+done:
+ return true;
+}
+
+union external_encoder_control {
+ EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION v1;
+ EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION_V3 v3;
+};
+
+static void
+amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
+ struct drm_encoder *ext_encoder,
+ int action)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder *ext_amdgpu_encoder = to_amdgpu_encoder(ext_encoder);
+ union external_encoder_control args;
+ struct drm_connector *connector;
+ int index = GetIndexIntoMasterTable(COMMAND, ExternalEncoderControl);
+ u8 frev, crev;
+ int dp_clock = 0;
+ int dp_lane_count = 0;
+ int connector_object_id = 0;
+ u32 ext_enum = (ext_amdgpu_encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+
+ if (action == EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT)
+ connector = amdgpu_get_connector_for_encoder_init(encoder);
+ else
+ connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct amdgpu_connector_atom_dig *dig_connector =
+ amdgpu_connector->con_priv;
+
+ dp_clock = dig_connector->dp_clock;
+ dp_lane_count = dig_connector->dp_lane_count;
+ connector_object_id =
+ (amdgpu_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ }
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ /* no params on frev 1 */
+ break;
+ case 2:
+ switch (crev) {
+ case 1:
+ case 2:
+ args.v1.sDigEncoder.ucAction = action;
+ args.v1.sDigEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ args.v1.sDigEncoder.ucEncoderMode =
+ amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (ENCODER_MODE_IS_DP(args.v1.sDigEncoder.ucEncoderMode)) {
+ if (dp_clock == 270000)
+ args.v1.sDigEncoder.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+ args.v1.sDigEncoder.ucLaneNum = dp_lane_count;
+ } else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v1.sDigEncoder.ucLaneNum = 8;
+ else
+ args.v1.sDigEncoder.ucLaneNum = 4;
+ break;
+ case 3:
+ args.v3.sExtEncoder.ucAction = action;
+ if (action == EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT)
+ args.v3.sExtEncoder.usConnectorId = cpu_to_le16(connector_object_id);
+ else
+ args.v3.sExtEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+ args.v3.sExtEncoder.ucEncoderMode =
+ amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ if (ENCODER_MODE_IS_DP(args.v3.sExtEncoder.ucEncoderMode)) {
+ if (dp_clock == 270000)
+ args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_DPLINKRATE_2_70GHZ;
+ else if (dp_clock == 540000)
+ args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_DPLINKRATE_5_40GHZ;
+ args.v3.sExtEncoder.ucLaneNum = dp_lane_count;
+ } else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+ args.v3.sExtEncoder.ucLaneNum = 8;
+ else
+ args.v3.sExtEncoder.ucLaneNum = 4;
+ switch (ext_enum) {
+ case GRAPH_OBJECT_ENUM_ID1:
+ args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER1;
+ break;
+ case GRAPH_OBJECT_ENUM_ID2:
+ args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER2;
+ break;
+ case GRAPH_OBJECT_ENUM_ID3:
+ args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER3;
+ break;
+ }
+ args.v3.sExtEncoder.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+ break;
+ default:
+ DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+ return;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+ return;
+ }
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void
+amdgpu_atombios_encoder_setup_dig(struct drm_encoder *encoder, int action)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ struct amdgpu_connector_atom_dig *amdgpu_dig_connector = NULL;
+
+ if (connector) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ amdgpu_dig_connector = amdgpu_connector->con_priv;
+ }
+
+ if (action == ATOM_ENABLE) {
+ if (!connector)
+ dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+ else
+ dig->panel_mode = amdgpu_atombios_dp_get_panel_mode(encoder, connector);
+
+ /* setup and enable the encoder */
+ amdgpu_atombios_encoder_setup_dig_encoder(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+ amdgpu_atombios_encoder_setup_dig_encoder(encoder,
+ ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
+ dig->panel_mode);
+ if (ext_encoder)
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+ EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+ connector) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ amdgpu_dig_connector->edp_on = true;
+ }
+ }
+ /* enable the transmitter */
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_ENABLE,
+ 0, 0);
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+ connector) {
+ /* DP_SET_POWER_D0 is set in amdgpu_atombios_dp_link_train */
+ amdgpu_atombios_dp_link_train(encoder, connector);
+ amdgpu_atombios_encoder_setup_dig_encoder(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0);
+ }
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_set_backlight_level(amdgpu_encoder, dig->backlight_level);
+ if (ext_encoder)
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder, ATOM_ENABLE);
+ } else {
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+ connector)
+ amdgpu_atombios_encoder_setup_dig_encoder(encoder,
+ ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
+ if (ext_encoder)
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder, ATOM_DISABLE);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+ connector)
+ amdgpu_atombios_dp_set_rx_power_state(connector, DP_SET_POWER_D3);
+ /* disable the transmitter */
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+ ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+ connector) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_OFF);
+ amdgpu_dig_connector->edp_on = false;
+ }
+ }
+ }
+}
+
+void
+amdgpu_atombios_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ DRM_DEBUG_KMS("encoder dpms %d to mode %d, devices %08x, active_devices %08x\n",
+ amdgpu_encoder->encoder_id, mode, amdgpu_encoder->devices,
+ amdgpu_encoder->active_device);
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_atombios_encoder_setup_dig(encoder, ATOM_ENABLE);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ amdgpu_atombios_encoder_setup_dig(encoder, ATOM_DISABLE);
+ break;
+ }
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_atombios_encoder_setup_dvo(encoder, ATOM_ENABLE);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ amdgpu_atombios_encoder_setup_dvo(encoder, ATOM_DISABLE);
+ break;
+ }
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_atombios_encoder_setup_dac(encoder, ATOM_ENABLE);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ amdgpu_atombios_encoder_setup_dac(encoder, ATOM_DISABLE);
+ break;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
+union crtc_source_param {
+ SELECT_CRTC_SOURCE_PS_ALLOCATION v1;
+ SELECT_CRTC_SOURCE_PARAMETERS_V2 v2;
+ SELECT_CRTC_SOURCE_PARAMETERS_V3 v3;
+};
+
+void
+amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ union crtc_source_param args;
+ int index = GetIndexIntoMasterTable(COMMAND, SelectCRTC_Source);
+ uint8_t frev, crev;
+ struct amdgpu_encoder_atom_dig *dig;
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return;
+
+ switch (frev) {
+ case 1:
+ switch (crev) {
+ case 1:
+ default:
+ args.v1.ucCRTC = amdgpu_crtc->crtc_id;
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+ args.v1.ucDevice = ATOM_DEVICE_DFP1_INDEX;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+ case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+ if (amdgpu_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT)
+ args.v1.ucDevice = ATOM_DEVICE_LCD1_INDEX;
+ else
+ args.v1.ucDevice = ATOM_DEVICE_DFP3_INDEX;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_DDI:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ args.v1.ucDevice = ATOM_DEVICE_DFP2_INDEX;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+ else
+ args.v1.ucDevice = ATOM_DEVICE_CRT1_INDEX;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+ else
+ args.v1.ucDevice = ATOM_DEVICE_CRT2_INDEX;
+ break;
+ }
+ break;
+ case 2:
+ args.v2.ucCRTC = amdgpu_crtc->crtc_id;
+ if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE) {
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+ else if (connector->connector_type == DRM_MODE_CONNECTOR_VGA)
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT;
+ else
+ args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+ } else {
+ args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ }
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ dig = amdgpu_encoder->enc_priv;
+ switch (dig->dig_encoder) {
+ case 0:
+ args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+ break;
+ case 1:
+ args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+ break;
+ case 2:
+ args.v2.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+ break;
+ case 3:
+ args.v2.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+ break;
+ case 4:
+ args.v2.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+ break;
+ case 5:
+ args.v2.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+ break;
+ case 6:
+ args.v2.ucEncoderID = ASIC_INT_DIG7_ENCODER_ID;
+ break;
+ }
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ args.v2.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else
+ args.v2.ucEncoderID = ASIC_INT_DAC1_ENCODER_ID;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else
+ args.v2.ucEncoderID = ASIC_INT_DAC2_ENCODER_ID;
+ break;
+ }
+ break;
+ case 3:
+ args.v3.ucCRTC = amdgpu_crtc->crtc_id;
+ if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE) {
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+ else if (connector->connector_type == DRM_MODE_CONNECTOR_VGA)
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT;
+ else
+ args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+ } else {
+ args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ }
+ args.v3.ucDstBpc = amdgpu_atombios_encoder_get_bpc(encoder);
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+ dig = amdgpu_encoder->enc_priv;
+ switch (dig->dig_encoder) {
+ case 0:
+ args.v3.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+ break;
+ case 1:
+ args.v3.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+ break;
+ case 2:
+ args.v3.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+ break;
+ case 3:
+ args.v3.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+ break;
+ case 4:
+ args.v3.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+ break;
+ case 5:
+ args.v3.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+ break;
+ case 6:
+ args.v3.ucEncoderID = ASIC_INT_DIG7_ENCODER_ID;
+ break;
+ }
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ args.v3.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else
+ args.v3.ucEncoderID = ASIC_INT_DAC1_ENCODER_ID;
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+ args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+ args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+ else
+ args.v3.ucEncoderID = ASIC_INT_DAC2_ENCODER_ID;
+ break;
+ }
+ break;
+ }
+ break;
+ default:
+ DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+ return;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+/* This only needs to be called once at startup */
+void
+amdgpu_atombios_encoder_init_dig(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_encoder *encoder;
+
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ amdgpu_atombios_encoder_setup_dig_transmitter(encoder, ATOM_TRANSMITTER_ACTION_INIT,
+ 0, 0);
+ break;
+ }
+
+ if (ext_encoder)
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+ EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT);
+ }
+}
+
+static bool
+amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT |
+ ATOM_DEVICE_CV_SUPPORT |
+ ATOM_DEVICE_CRT_SUPPORT)) {
+ DAC_LOAD_DETECTION_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, DAC_LoadDetection);
+ uint8_t frev, crev;
+
+ memset(&args, 0, sizeof(args));
+
+ if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+ return false;
+
+ args.sDacload.ucMisc = 0;
+
+ if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DAC1) ||
+ (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1))
+ args.sDacload.ucDacType = ATOM_DAC_A;
+ else
+ args.sDacload.ucDacType = ATOM_DAC_B;
+
+ if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)
+ args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT1_SUPPORT);
+ else if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)
+ args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT2_SUPPORT);
+ else if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+ args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CV_SUPPORT);
+ if (crev >= 3)
+ args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+ } else if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+ args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_TV1_SUPPORT);
+ if (crev >= 3)
+ args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+ }
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ return true;
+ } else
+ return false;
+}
+
+enum drm_connector_status
+amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ uint32_t bios_0_scratch;
+
+ if (!amdgpu_atombios_encoder_dac_load_detect(encoder, connector)) {
+ DRM_DEBUG_KMS("detect returned false \n");
+ return connector_status_unknown;
+ }
+
+ bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+
+ DRM_DEBUG_KMS("Bios 0 scratch %x %08x\n", bios_0_scratch, amdgpu_encoder->devices);
+ if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+ if (bios_0_scratch & ATOM_S0_CRT1_MASK)
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+ if (bios_0_scratch & ATOM_S0_CRT2_MASK)
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+ if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A))
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+ if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A))
+ return connector_status_connected; /* CTV */
+ else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A))
+ return connector_status_connected; /* STV */
+ }
+ return connector_status_disconnected;
+}
+
+enum drm_connector_status
+amdgpu_atombios_encoder_dig_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+ u32 bios_0_scratch;
+
+ if (!ext_encoder)
+ return connector_status_unknown;
+
+ if ((amdgpu_connector->devices & ATOM_DEVICE_CRT_SUPPORT) == 0)
+ return connector_status_unknown;
+
+ /* load detect on the dp bridge */
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+ EXTERNAL_ENCODER_ACTION_V3_DACLOAD_DETECTION);
+
+ bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+
+ DRM_DEBUG_KMS("Bios 0 scratch %x %08x\n", bios_0_scratch, amdgpu_encoder->devices);
+ if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+ if (bios_0_scratch & ATOM_S0_CRT1_MASK)
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+ if (bios_0_scratch & ATOM_S0_CRT2_MASK)
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+ if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A))
+ return connector_status_connected;
+ }
+ if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+ if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A))
+ return connector_status_connected; /* CTV */
+ else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A))
+ return connector_status_connected; /* STV */
+ }
+ return connector_status_disconnected;
+}
+
+void
+amdgpu_atombios_encoder_setup_ext_encoder_ddc(struct drm_encoder *encoder)
+{
+ struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+
+ if (ext_encoder)
+ /* ddc_setup on the dp bridge */
+ amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+ EXTERNAL_ENCODER_ACTION_V3_DDC_SETUP);
+
+}
+
+void
+amdgpu_atombios_encoder_set_bios_scratch_regs(struct drm_connector *connector,
+ struct drm_encoder *encoder,
+ bool connected)
+{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_connector *amdgpu_connector =
+ to_amdgpu_connector(connector);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ uint32_t bios_0_scratch, bios_3_scratch, bios_6_scratch;
+
+ bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+ bios_3_scratch = RREG32(mmBIOS_SCRATCH_3);
+ bios_6_scratch = RREG32(mmBIOS_SCRATCH_6);
+
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_LCD1_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("LCD1 connected\n");
+ bios_0_scratch |= ATOM_S0_LCD1;
+ bios_3_scratch |= ATOM_S3_LCD1_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_LCD1;
+ } else {
+ DRM_DEBUG_KMS("LCD1 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_LCD1;
+ bios_3_scratch &= ~ATOM_S3_LCD1_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_LCD1;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("CRT1 connected\n");
+ bios_0_scratch |= ATOM_S0_CRT1_COLOR;
+ bios_3_scratch |= ATOM_S3_CRT1_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_CRT1;
+ } else {
+ DRM_DEBUG_KMS("CRT1 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_CRT1_MASK;
+ bios_3_scratch &= ~ATOM_S3_CRT1_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT1;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("CRT2 connected\n");
+ bios_0_scratch |= ATOM_S0_CRT2_COLOR;
+ bios_3_scratch |= ATOM_S3_CRT2_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_CRT2;
+ } else {
+ DRM_DEBUG_KMS("CRT2 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_CRT2_MASK;
+ bios_3_scratch &= ~ATOM_S3_CRT2_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT2;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP1_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP1 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP1;
+ bios_3_scratch |= ATOM_S3_DFP1_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP1;
+ } else {
+ DRM_DEBUG_KMS("DFP1 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP1;
+ bios_3_scratch &= ~ATOM_S3_DFP1_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP1;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP2_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP2 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP2;
+ bios_3_scratch |= ATOM_S3_DFP2_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP2;
+ } else {
+ DRM_DEBUG_KMS("DFP2 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP2;
+ bios_3_scratch &= ~ATOM_S3_DFP2_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP2;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP3_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP3_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP3 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP3;
+ bios_3_scratch |= ATOM_S3_DFP3_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP3;
+ } else {
+ DRM_DEBUG_KMS("DFP3 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP3;
+ bios_3_scratch &= ~ATOM_S3_DFP3_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP3;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP4_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP4_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP4 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP4;
+ bios_3_scratch |= ATOM_S3_DFP4_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP4;
+ } else {
+ DRM_DEBUG_KMS("DFP4 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP4;
+ bios_3_scratch &= ~ATOM_S3_DFP4_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP4;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP5_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP5_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP5 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP5;
+ bios_3_scratch |= ATOM_S3_DFP5_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP5;
+ } else {
+ DRM_DEBUG_KMS("DFP5 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP5;
+ bios_3_scratch &= ~ATOM_S3_DFP5_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP5;
+ }
+ }
+ if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP6_SUPPORT) &&
+ (amdgpu_connector->devices & ATOM_DEVICE_DFP6_SUPPORT)) {
+ if (connected) {
+ DRM_DEBUG_KMS("DFP6 connected\n");
+ bios_0_scratch |= ATOM_S0_DFP6;
+ bios_3_scratch |= ATOM_S3_DFP6_ACTIVE;
+ bios_6_scratch |= ATOM_S6_ACC_REQ_DFP6;
+ } else {
+ DRM_DEBUG_KMS("DFP6 disconnected\n");
+ bios_0_scratch &= ~ATOM_S0_DFP6;
+ bios_3_scratch &= ~ATOM_S3_DFP6_ACTIVE;
+ bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP6;
+ }
+ }
+
+ WREG32(mmBIOS_SCRATCH_0, bios_0_scratch);
+ WREG32(mmBIOS_SCRATCH_3, bios_3_scratch);
+ WREG32(mmBIOS_SCRATCH_6, bios_6_scratch);
+}
+
+union lvds_info {
+ struct _ATOM_LVDS_INFO info;
+ struct _ATOM_LVDS_INFO_V12 info_12;
+};
+
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
+{
+ struct drm_device *dev = encoder->base.dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, LVDS_Info);
+ uint16_t data_offset, misc;
+ union lvds_info *lvds_info;
+ uint8_t frev, crev;
+ struct amdgpu_encoder_atom_dig *lvds = NULL;
+ int encoder_enum = (encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ lvds_info =
+ (union lvds_info *)(mode_info->atom_context->bios + data_offset);
+ lvds =
+ kzalloc(sizeof(struct amdgpu_encoder_atom_dig), GFP_KERNEL);
+
+ if (!lvds)
+ return NULL;
+
+ lvds->native_mode.clock =
+ le16_to_cpu(lvds_info->info.sLCDTiming.usPixClk) * 10;
+ lvds->native_mode.hdisplay =
+ le16_to_cpu(lvds_info->info.sLCDTiming.usHActive);
+ lvds->native_mode.vdisplay =
+ le16_to_cpu(lvds_info->info.sLCDTiming.usVActive);
+ lvds->native_mode.htotal = lvds->native_mode.hdisplay +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usHBlanking_Time);
+ lvds->native_mode.hsync_start = lvds->native_mode.hdisplay +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncOffset);
+ lvds->native_mode.hsync_end = lvds->native_mode.hsync_start +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncWidth);
+ lvds->native_mode.vtotal = lvds->native_mode.vdisplay +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usVBlanking_Time);
+ lvds->native_mode.vsync_start = lvds->native_mode.vdisplay +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncOffset);
+ lvds->native_mode.vsync_end = lvds->native_mode.vsync_start +
+ le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncWidth);
+ lvds->panel_pwr_delay =
+ le16_to_cpu(lvds_info->info.usOffDelayInMs);
+ lvds->lcd_misc = lvds_info->info.ucLVDS_Misc;
+
+ misc = le16_to_cpu(lvds_info->info.sLCDTiming.susModeMiscInfo.usAccess);
+ if (misc & ATOM_VSYNC_POLARITY)
+ lvds->native_mode.flags |= DRM_MODE_FLAG_NVSYNC;
+ if (misc & ATOM_HSYNC_POLARITY)
+ lvds->native_mode.flags |= DRM_MODE_FLAG_NHSYNC;
+ if (misc & ATOM_COMPOSITESYNC)
+ lvds->native_mode.flags |= DRM_MODE_FLAG_CSYNC;
+ if (misc & ATOM_INTERLACE)
+ lvds->native_mode.flags |= DRM_MODE_FLAG_INTERLACE;
+ if (misc & ATOM_DOUBLE_CLOCK_MODE)
+ lvds->native_mode.flags |= DRM_MODE_FLAG_DBLSCAN;
+
+ lvds->native_mode.width_mm = le16_to_cpu(lvds_info->info.sLCDTiming.usImageHSize);
+ lvds->native_mode.height_mm = le16_to_cpu(lvds_info->info.sLCDTiming.usImageVSize);
+
+ /* set crtc values */
+ drm_mode_set_crtcinfo(&lvds->native_mode, CRTC_INTERLACE_HALVE_V);
+
+ lvds->lcd_ss_id = lvds_info->info.ucSS_Id;
+
+ encoder->native_mode = lvds->native_mode;
+
+ if (encoder_enum == 2)
+ lvds->linkb = true;
+ else
+ lvds->linkb = false;
+
+ /* parse the lcd record table */
+ if (le16_to_cpu(lvds_info->info.usModePatchTableOffset)) {
+ ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record;
+ ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record;
+ bool bad_record = false;
+ u8 *record;
+
+ if ((frev == 1) && (crev < 2))
+ /* absolute */
+ record = (u8 *)(mode_info->atom_context->bios +
+ le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+ else
+ /* relative */
+ record = (u8 *)(mode_info->atom_context->bios +
+ data_offset +
+ le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+ while (*record != ATOM_RECORD_END_TYPE) {
+ switch (*record) {
+ case LCD_MODE_PATCH_RECORD_MODE_TYPE:
+ record += sizeof(ATOM_PATCH_RECORD_MODE);
+ break;
+ case LCD_RTS_RECORD_TYPE:
+ record += sizeof(ATOM_LCD_RTS_RECORD);
+ break;
+ case LCD_CAP_RECORD_TYPE:
+ record += sizeof(ATOM_LCD_MODE_CONTROL_CAP);
+ break;
+ case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
+ fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
+ if (fake_edid_record->ucFakeEDIDLength) {
+ struct edid *edid;
+ int edid_size =
+ max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength);
+ edid = kmalloc(edid_size, GFP_KERNEL);
+ if (edid) {
+ memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0],
+ fake_edid_record->ucFakeEDIDLength);
+
+ if (drm_edid_is_valid(edid)) {
+ adev->mode_info.bios_hardcoded_edid = edid;
+ adev->mode_info.bios_hardcoded_edid_size = edid_size;
+ } else
+ kfree(edid);
+ }
+ }
+ record += fake_edid_record->ucFakeEDIDLength ?
+ struct_size(fake_edid_record,
+ ucFakeEDIDString,
+ fake_edid_record->ucFakeEDIDLength) :
+ /* empty fake edid record must be 3 bytes long */
+ sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
+ break;
+ case LCD_PANEL_RESOLUTION_RECORD_TYPE:
+ panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
+ lvds->native_mode.width_mm = panel_res_record->usHSize;
+ lvds->native_mode.height_mm = panel_res_record->usVSize;
+ record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD);
+ break;
+ default:
+ DRM_ERROR("Bad LCD record %d\n", *record);
+ bad_record = true;
+ break;
+ }
+ if (bad_record)
+ break;
+ }
+ }
+ }
+ return lvds;
+}
+
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_dig_info(struct amdgpu_encoder *amdgpu_encoder)
+{
+ int encoder_enum = (amdgpu_encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+ struct amdgpu_encoder_atom_dig *dig = kzalloc(sizeof(struct amdgpu_encoder_atom_dig), GFP_KERNEL);
+
+ if (!dig)
+ return NULL;
+
+ /* coherent mode by default */
+ dig->coherent_mode = true;
+ dig->dig_encoder = -1;
+
+ if (encoder_enum == 2)
+ dig->linkb = true;
+ else
+ dig->linkb = false;
+
+ return dig;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
new file mode 100644
index 000000000..f77cbdef6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_ENCODER_H__
+#define __ATOMBIOS_ENCODER_H__
+
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+ u8 backlight_level);
+u8
+amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
+void
+amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
+ u8 level);
+void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encoder,
+ struct drm_connector *drm_connector);
+void
+amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder);
+bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder);
+bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);
+int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
+ int action, int panel_mode);
+void
+amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action,
+ uint8_t lane_num, uint8_t lane_set);
+bool
+amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
+ int action);
+void
+amdgpu_atombios_encoder_dpms(struct drm_encoder *encoder, int mode);
+void
+amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_init_dig(struct amdgpu_device *adev);
+enum drm_connector_status
+amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector);
+enum drm_connector_status
+amdgpu_atombios_encoder_dig_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector);
+void
+amdgpu_atombios_encoder_setup_ext_encoder_ddc(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_set_bios_scratch_regs(struct drm_connector *connector,
+ struct drm_encoder *encoder,
+ bool connected);
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder);
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_dig_info(struct amdgpu_encoder *amdgpu_encoder);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
new file mode 100644
index 000000000..af0335535
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
+
+#define TARGET_HW_I2C_CLOCK 50
+
+/* these are a limitation of ProcessI2cChannelTransaction not the hw */
+#define ATOM_MAX_HW_I2C_WRITE 3
+#define ATOM_MAX_HW_I2C_READ 255
+
+static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
+ u8 slave_addr, u8 flags,
+ u8 *buf, u8 num)
+{
+ struct drm_device *dev = chan->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+ unsigned char *base;
+ u16 out = cpu_to_le16(0);
+ int r = 0;
+
+ memset(&args, 0, sizeof(args));
+
+ mutex_lock(&chan->mutex);
+
+ base = (unsigned char *)adev->mode_info.atom_context->scratch;
+
+ if (flags & HW_I2C_WRITE) {
+ if (num > ATOM_MAX_HW_I2C_WRITE) {
+ DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 3)\n", num);
+ r = -EINVAL;
+ goto done;
+ }
+ if (buf == NULL)
+ args.ucRegIndex = 0;
+ else
+ args.ucRegIndex = buf[0];
+ if (num)
+ num--;
+ if (num) {
+ if (buf) {
+ memcpy(&out, &buf[1], num);
+ } else {
+ DRM_ERROR("hw i2c: missing buf with num > 1\n");
+ r = -EINVAL;
+ goto done;
+ }
+ }
+ args.lpI2CDataOut = cpu_to_le16(out);
+ } else {
+ args.ucRegIndex = 0;
+ args.lpI2CDataOut = 0;
+ }
+
+ args.ucFlag = flags;
+ args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+ args.ucTransBytes = num;
+ args.ucSlaveAddr = slave_addr << 1;
+ args.ucLineNumber = chan->rec.i2c_id;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+
+ /* error */
+ if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
+ DRM_DEBUG_KMS("hw_i2c error\n");
+ r = -EIO;
+ goto done;
+ }
+
+ if (!(flags & HW_I2C_WRITE))
+ amdgpu_atombios_copy_swap(buf, base, num, false);
+
+done:
+ mutex_unlock(&chan->mutex);
+
+ return r;
+}
+
+int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
+ struct i2c_msg *msgs, int num)
+{
+ struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+ struct i2c_msg *p;
+ int i, remaining, current_count, buffer_offset, max_bytes, ret;
+ u8 flags;
+
+ /* check for bus probe */
+ p = &msgs[0];
+ if ((num == 1) && (p->len == 0)) {
+ ret = amdgpu_atombios_i2c_process_i2c_ch(i2c,
+ p->addr, HW_I2C_WRITE,
+ NULL, 0);
+ if (ret)
+ return ret;
+ else
+ return num;
+ }
+
+ for (i = 0; i < num; i++) {
+ p = &msgs[i];
+ remaining = p->len;
+ buffer_offset = 0;
+ /* max_bytes are a limitation of ProcessI2cChannelTransaction not the hw */
+ if (p->flags & I2C_M_RD) {
+ max_bytes = ATOM_MAX_HW_I2C_READ;
+ flags = HW_I2C_READ;
+ } else {
+ max_bytes = ATOM_MAX_HW_I2C_WRITE;
+ flags = HW_I2C_WRITE;
+ }
+ while (remaining) {
+ if (remaining > max_bytes)
+ current_count = max_bytes;
+ else
+ current_count = remaining;
+ ret = amdgpu_atombios_i2c_process_i2c_ch(i2c,
+ p->addr, flags,
+ &p->buf[buffer_offset], current_count);
+ if (ret)
+ return ret;
+ remaining -= current_count;
+ buffer_offset += current_count;
+ }
+ }
+
+ return num;
+}
+
+u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
+{
+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+ PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+ int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+ args.ucRegIndex = offset;
+ args.lpI2CDataOut = data;
+ args.ucFlag = 1;
+ args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+ args.ucTransBytes = 1;
+ args.ucSlaveAddr = slave_addr;
+ args.ucLineNumber = line_number;
+
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
new file mode 100644
index 000000000..251aaf41f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_I2C_H__
+#define __ATOMBIOS_I2C_H__
+
+int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
+ struct i2c_msg *msgs, int num);
+u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+ u8 slave_addr, u8 line_number, u8 offset, u8 data);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
new file mode 100644
index 000000000..e63abdf52
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -0,0 +1,2321 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "cikd.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "cik.h"
+#include "gmc_v7_0.h"
+#include "cik_ih.h"
+#include "dce_v8_0.h"
+#include "gfx_v7_0.h"
+#include "cik_sdma.h"
+#include "uvd_v4_2.h"
+#include "vce_v2_0.h"
+#include "cik_dpm.h"
+
+#include "uvd/uvd_4_2_d.h"
+
+#include "smu/smu_7_0_1_d.h"
+#include "smu/smu_7_0_1_sh_mask.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#include "amdgpu_dm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_vkms.h"
+
+static const struct amdgpu_video_codec_info cik_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(cik_video_codecs_encode_array),
+ .codec_array = cik_video_codecs_encode_array,
+};
+
+static const struct amdgpu_video_codec_info cik_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 41,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 4,
+ },
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(cik_video_codecs_decode_array),
+ .codec_array = cik_video_codecs_decode_array,
+};
+
+static int cik_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ if (encode)
+ *codecs = &cik_video_codecs_encode;
+ else
+ *codecs = &cik_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Indirect registers accessor
+ */
+static u32 cik_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(mmPCIE_INDEX, reg);
+ (void)RREG32(mmPCIE_INDEX);
+ r = RREG32(mmPCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void cik_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(mmPCIE_INDEX, reg);
+ (void)RREG32(mmPCIE_INDEX);
+ WREG32(mmPCIE_DATA, v);
+ (void)RREG32(mmPCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ r = RREG32(mmSMC_IND_DATA_0);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ return r;
+}
+
+static void cik_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ WREG32(mmSMC_IND_DATA_0, (v));
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
+static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ r = RREG32(mmUVD_CTX_DATA);
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ return r;
+}
+
+static void cik_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ WREG32(mmUVD_CTX_DATA, (v));
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
+static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(mmDIDT_IND_INDEX, (reg));
+ r = RREG32(mmDIDT_IND_DATA);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void cik_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(mmDIDT_IND_INDEX, (reg));
+ WREG32(mmDIDT_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static const u32 bonaire_golden_spm_registers[] =
+{
+ 0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 bonaire_golden_common_registers[] =
+{
+ 0x31dc, 0xffffffff, 0x00000800,
+ 0x31dd, 0xffffffff, 0x00000800,
+ 0x31e6, 0xffffffff, 0x00007fbf,
+ 0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 bonaire_golden_registers[] =
+{
+ 0xcd5, 0x00000333, 0x00000333,
+ 0xcd4, 0x000c0fc0, 0x00040200,
+ 0x2684, 0x00010000, 0x00058208,
+ 0xf000, 0xffff1fff, 0x00140000,
+ 0xf080, 0xfdfc0fff, 0x00000100,
+ 0xf08d, 0x40000000, 0x40000200,
+ 0x260c, 0xffffffff, 0x00000000,
+ 0x260d, 0xf00fffff, 0x00000400,
+ 0x260e, 0x0002021c, 0x00020200,
+ 0x31e, 0x00000080, 0x00000000,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0xf0311fff, 0x80300000,
+ 0x263e, 0x73773777, 0x12010001,
+ 0xd43, 0x00810000, 0x408af000,
+ 0x1c0c, 0x31000111, 0x00000011,
+ 0xbd2, 0x73773777, 0x12010001,
+ 0x883, 0x00007fb6, 0x0021a1b1,
+ 0x884, 0x00007fb6, 0x002021b1,
+ 0x860, 0x00007fb6, 0x00002191,
+ 0x886, 0x00007fb6, 0x002121b1,
+ 0x887, 0x00007fb6, 0x002021b1,
+ 0x877, 0x00007fb6, 0x00002191,
+ 0x878, 0x00007fb6, 0x00002191,
+ 0xd8a, 0x0000003f, 0x0000000a,
+ 0xd8b, 0x0000003f, 0x0000000a,
+ 0xab9, 0x00073ffe, 0x000022a2,
+ 0x903, 0x000007ff, 0x00000000,
+ 0x2285, 0xf000003f, 0x00000007,
+ 0x22fc, 0x00002001, 0x00000001,
+ 0x22c9, 0xffffffff, 0x00ffffff,
+ 0xc281, 0x0000ff0f, 0x00000000,
+ 0xa293, 0x07ffffff, 0x06000000,
+ 0x136, 0x00000fff, 0x00000100,
+ 0xf9e, 0x00000001, 0x00000002,
+ 0x2440, 0x03000000, 0x0362c688,
+ 0x2300, 0x000000ff, 0x00000001,
+ 0x390, 0x00001fff, 0x00001fff,
+ 0x2418, 0x0000007f, 0x00000020,
+ 0x2542, 0x00010000, 0x00010000,
+ 0x2b05, 0x000003ff, 0x000000f3,
+ 0x2b03, 0xffffffff, 0x00001032
+};
+
+static const u32 bonaire_mgcg_cgcg_init[] =
+{
+ 0x3108, 0xffffffff, 0xfffffffc,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf0a8, 0xffffffff, 0x00000100,
+ 0xf082, 0xffffffff, 0x00000100,
+ 0xf0b0, 0xffffffff, 0xc0000100,
+ 0xf0b2, 0xffffffff, 0xc0000100,
+ 0xf0b1, 0xffffffff, 0xc0000100,
+ 0x1579, 0xffffffff, 0x00600100,
+ 0xf0a0, 0xffffffff, 0x00000100,
+ 0xf085, 0xffffffff, 0x06000100,
+ 0xf088, 0xffffffff, 0x00000100,
+ 0xf086, 0xffffffff, 0x06000100,
+ 0xf081, 0xffffffff, 0x00000100,
+ 0xf0b8, 0xffffffff, 0x00000100,
+ 0xf089, 0xffffffff, 0x00000100,
+ 0xf080, 0xffffffff, 0x00000100,
+ 0xf08c, 0xffffffff, 0x00000100,
+ 0xf08d, 0xffffffff, 0x00000100,
+ 0xf094, 0xffffffff, 0x00000100,
+ 0xf095, 0xffffffff, 0x00000100,
+ 0xf096, 0xffffffff, 0x00000100,
+ 0xf097, 0xffffffff, 0x00000100,
+ 0xf098, 0xffffffff, 0x00000100,
+ 0xf09f, 0xffffffff, 0x00000100,
+ 0xf09e, 0xffffffff, 0x00000100,
+ 0xf084, 0xffffffff, 0x06000100,
+ 0xf0a4, 0xffffffff, 0x00000100,
+ 0xf09d, 0xffffffff, 0x00000100,
+ 0xf0ad, 0xffffffff, 0x00000100,
+ 0xf0ac, 0xffffffff, 0x00000100,
+ 0xf09c, 0xffffffff, 0x00000100,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf008, 0xffffffff, 0x00010000,
+ 0xf009, 0xffffffff, 0x00030002,
+ 0xf00a, 0xffffffff, 0x00040007,
+ 0xf00b, 0xffffffff, 0x00060005,
+ 0xf00c, 0xffffffff, 0x00090008,
+ 0xf00d, 0xffffffff, 0x00010000,
+ 0xf00e, 0xffffffff, 0x00030002,
+ 0xf00f, 0xffffffff, 0x00040007,
+ 0xf010, 0xffffffff, 0x00060005,
+ 0xf011, 0xffffffff, 0x00090008,
+ 0xf012, 0xffffffff, 0x00010000,
+ 0xf013, 0xffffffff, 0x00030002,
+ 0xf014, 0xffffffff, 0x00040007,
+ 0xf015, 0xffffffff, 0x00060005,
+ 0xf016, 0xffffffff, 0x00090008,
+ 0xf017, 0xffffffff, 0x00010000,
+ 0xf018, 0xffffffff, 0x00030002,
+ 0xf019, 0xffffffff, 0x00040007,
+ 0xf01a, 0xffffffff, 0x00060005,
+ 0xf01b, 0xffffffff, 0x00090008,
+ 0xf01c, 0xffffffff, 0x00010000,
+ 0xf01d, 0xffffffff, 0x00030002,
+ 0xf01e, 0xffffffff, 0x00040007,
+ 0xf01f, 0xffffffff, 0x00060005,
+ 0xf020, 0xffffffff, 0x00090008,
+ 0xf021, 0xffffffff, 0x00010000,
+ 0xf022, 0xffffffff, 0x00030002,
+ 0xf023, 0xffffffff, 0x00040007,
+ 0xf024, 0xffffffff, 0x00060005,
+ 0xf025, 0xffffffff, 0x00090008,
+ 0xf026, 0xffffffff, 0x00010000,
+ 0xf027, 0xffffffff, 0x00030002,
+ 0xf028, 0xffffffff, 0x00040007,
+ 0xf029, 0xffffffff, 0x00060005,
+ 0xf02a, 0xffffffff, 0x00090008,
+ 0xf000, 0xffffffff, 0x96e00200,
+ 0x21c2, 0xffffffff, 0x00900100,
+ 0x3109, 0xffffffff, 0x0020003f,
+ 0xe, 0xffffffff, 0x0140001c,
+ 0xf, 0x000f0000, 0x000f0000,
+ 0x88, 0xffffffff, 0xc060000c,
+ 0x89, 0xc0000fff, 0x00000100,
+ 0x3e4, 0xffffffff, 0x00000100,
+ 0x3e6, 0x00000101, 0x00000000,
+ 0x82a, 0xffffffff, 0x00000104,
+ 0x1579, 0xff000fff, 0x00000100,
+ 0xc33, 0xc0000fff, 0x00000104,
+ 0x3079, 0x00000001, 0x00000001,
+ 0x3403, 0xff000ff0, 0x00000100,
+ 0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 spectre_golden_spm_registers[] =
+{
+ 0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 spectre_golden_common_registers[] =
+{
+ 0x31dc, 0xffffffff, 0x00000800,
+ 0x31dd, 0xffffffff, 0x00000800,
+ 0x31e6, 0xffffffff, 0x00007fbf,
+ 0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 spectre_golden_registers[] =
+{
+ 0xf000, 0xffff1fff, 0x96940200,
+ 0xf003, 0xffff0001, 0xff000000,
+ 0xf080, 0xfffc0fff, 0x00000100,
+ 0x1bb6, 0x00010101, 0x00010000,
+ 0x260d, 0xf00fffff, 0x00000400,
+ 0x260e, 0xfffffffc, 0x00020200,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0xf0311fff, 0x80300000,
+ 0x263e, 0x73773777, 0x12010001,
+ 0x26df, 0x00ff0000, 0x00fc0000,
+ 0xbd2, 0x73773777, 0x12010001,
+ 0x2285, 0xf000003f, 0x00000007,
+ 0x22c9, 0xffffffff, 0x00ffffff,
+ 0xa0d4, 0x3f3f3fff, 0x00000082,
+ 0xa0d5, 0x0000003f, 0x00000000,
+ 0xf9e, 0x00000001, 0x00000002,
+ 0x244f, 0xffff03df, 0x00000004,
+ 0x31da, 0x00000008, 0x00000008,
+ 0x2300, 0x000008ff, 0x00000800,
+ 0x2542, 0x00010000, 0x00010000,
+ 0x2b03, 0xffffffff, 0x54763210,
+ 0x853e, 0x01ff01ff, 0x00000002,
+ 0x8526, 0x007ff800, 0x00200000,
+ 0x8057, 0xffffffff, 0x00000f40,
+ 0xc24d, 0xffffffff, 0x00000001
+};
+
+static const u32 spectre_mgcg_cgcg_init[] =
+{
+ 0x3108, 0xffffffff, 0xfffffffc,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf0a8, 0xffffffff, 0x00000100,
+ 0xf082, 0xffffffff, 0x00000100,
+ 0xf0b0, 0xffffffff, 0x00000100,
+ 0xf0b2, 0xffffffff, 0x00000100,
+ 0xf0b1, 0xffffffff, 0x00000100,
+ 0x1579, 0xffffffff, 0x00600100,
+ 0xf0a0, 0xffffffff, 0x00000100,
+ 0xf085, 0xffffffff, 0x06000100,
+ 0xf088, 0xffffffff, 0x00000100,
+ 0xf086, 0xffffffff, 0x06000100,
+ 0xf081, 0xffffffff, 0x00000100,
+ 0xf0b8, 0xffffffff, 0x00000100,
+ 0xf089, 0xffffffff, 0x00000100,
+ 0xf080, 0xffffffff, 0x00000100,
+ 0xf08c, 0xffffffff, 0x00000100,
+ 0xf08d, 0xffffffff, 0x00000100,
+ 0xf094, 0xffffffff, 0x00000100,
+ 0xf095, 0xffffffff, 0x00000100,
+ 0xf096, 0xffffffff, 0x00000100,
+ 0xf097, 0xffffffff, 0x00000100,
+ 0xf098, 0xffffffff, 0x00000100,
+ 0xf09f, 0xffffffff, 0x00000100,
+ 0xf09e, 0xffffffff, 0x00000100,
+ 0xf084, 0xffffffff, 0x06000100,
+ 0xf0a4, 0xffffffff, 0x00000100,
+ 0xf09d, 0xffffffff, 0x00000100,
+ 0xf0ad, 0xffffffff, 0x00000100,
+ 0xf0ac, 0xffffffff, 0x00000100,
+ 0xf09c, 0xffffffff, 0x00000100,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf008, 0xffffffff, 0x00010000,
+ 0xf009, 0xffffffff, 0x00030002,
+ 0xf00a, 0xffffffff, 0x00040007,
+ 0xf00b, 0xffffffff, 0x00060005,
+ 0xf00c, 0xffffffff, 0x00090008,
+ 0xf00d, 0xffffffff, 0x00010000,
+ 0xf00e, 0xffffffff, 0x00030002,
+ 0xf00f, 0xffffffff, 0x00040007,
+ 0xf010, 0xffffffff, 0x00060005,
+ 0xf011, 0xffffffff, 0x00090008,
+ 0xf012, 0xffffffff, 0x00010000,
+ 0xf013, 0xffffffff, 0x00030002,
+ 0xf014, 0xffffffff, 0x00040007,
+ 0xf015, 0xffffffff, 0x00060005,
+ 0xf016, 0xffffffff, 0x00090008,
+ 0xf017, 0xffffffff, 0x00010000,
+ 0xf018, 0xffffffff, 0x00030002,
+ 0xf019, 0xffffffff, 0x00040007,
+ 0xf01a, 0xffffffff, 0x00060005,
+ 0xf01b, 0xffffffff, 0x00090008,
+ 0xf01c, 0xffffffff, 0x00010000,
+ 0xf01d, 0xffffffff, 0x00030002,
+ 0xf01e, 0xffffffff, 0x00040007,
+ 0xf01f, 0xffffffff, 0x00060005,
+ 0xf020, 0xffffffff, 0x00090008,
+ 0xf021, 0xffffffff, 0x00010000,
+ 0xf022, 0xffffffff, 0x00030002,
+ 0xf023, 0xffffffff, 0x00040007,
+ 0xf024, 0xffffffff, 0x00060005,
+ 0xf025, 0xffffffff, 0x00090008,
+ 0xf026, 0xffffffff, 0x00010000,
+ 0xf027, 0xffffffff, 0x00030002,
+ 0xf028, 0xffffffff, 0x00040007,
+ 0xf029, 0xffffffff, 0x00060005,
+ 0xf02a, 0xffffffff, 0x00090008,
+ 0xf02b, 0xffffffff, 0x00010000,
+ 0xf02c, 0xffffffff, 0x00030002,
+ 0xf02d, 0xffffffff, 0x00040007,
+ 0xf02e, 0xffffffff, 0x00060005,
+ 0xf02f, 0xffffffff, 0x00090008,
+ 0xf000, 0xffffffff, 0x96e00200,
+ 0x21c2, 0xffffffff, 0x00900100,
+ 0x3109, 0xffffffff, 0x0020003f,
+ 0xe, 0xffffffff, 0x0140001c,
+ 0xf, 0x000f0000, 0x000f0000,
+ 0x88, 0xffffffff, 0xc060000c,
+ 0x89, 0xc0000fff, 0x00000100,
+ 0x3e4, 0xffffffff, 0x00000100,
+ 0x3e6, 0x00000101, 0x00000000,
+ 0x82a, 0xffffffff, 0x00000104,
+ 0x1579, 0xff000fff, 0x00000100,
+ 0xc33, 0xc0000fff, 0x00000104,
+ 0x3079, 0x00000001, 0x00000001,
+ 0x3403, 0xff000ff0, 0x00000100,
+ 0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 kalindi_golden_spm_registers[] =
+{
+ 0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 kalindi_golden_common_registers[] =
+{
+ 0x31dc, 0xffffffff, 0x00000800,
+ 0x31dd, 0xffffffff, 0x00000800,
+ 0x31e6, 0xffffffff, 0x00007fbf,
+ 0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 kalindi_golden_registers[] =
+{
+ 0xf000, 0xffffdfff, 0x6e944040,
+ 0x1579, 0xff607fff, 0xfc000100,
+ 0xf088, 0xff000fff, 0x00000100,
+ 0xf089, 0xff000fff, 0x00000100,
+ 0xf080, 0xfffc0fff, 0x00000100,
+ 0x1bb6, 0x00010101, 0x00010000,
+ 0x260c, 0xffffffff, 0x00000000,
+ 0x260d, 0xf00fffff, 0x00000400,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0xf0311fff, 0x80300000,
+ 0x263e, 0x73773777, 0x12010001,
+ 0x263f, 0xffffffff, 0x00000010,
+ 0x26df, 0x00ff0000, 0x00fc0000,
+ 0x200c, 0x00001f0f, 0x0000100a,
+ 0xbd2, 0x73773777, 0x12010001,
+ 0x902, 0x000fffff, 0x000c007f,
+ 0x2285, 0xf000003f, 0x00000007,
+ 0x22c9, 0x3fff3fff, 0x00ffcfff,
+ 0xc281, 0x0000ff0f, 0x00000000,
+ 0xa293, 0x07ffffff, 0x06000000,
+ 0x136, 0x00000fff, 0x00000100,
+ 0xf9e, 0x00000001, 0x00000002,
+ 0x31da, 0x00000008, 0x00000008,
+ 0x2300, 0x000000ff, 0x00000003,
+ 0x853e, 0x01ff01ff, 0x00000002,
+ 0x8526, 0x007ff800, 0x00200000,
+ 0x8057, 0xffffffff, 0x00000f40,
+ 0x2231, 0x001f3ae3, 0x00000082,
+ 0x2235, 0x0000001f, 0x00000010,
+ 0xc24d, 0xffffffff, 0x00000000
+};
+
+static const u32 kalindi_mgcg_cgcg_init[] =
+{
+ 0x3108, 0xffffffff, 0xfffffffc,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf0a8, 0xffffffff, 0x00000100,
+ 0xf082, 0xffffffff, 0x00000100,
+ 0xf0b0, 0xffffffff, 0x00000100,
+ 0xf0b2, 0xffffffff, 0x00000100,
+ 0xf0b1, 0xffffffff, 0x00000100,
+ 0x1579, 0xffffffff, 0x00600100,
+ 0xf0a0, 0xffffffff, 0x00000100,
+ 0xf085, 0xffffffff, 0x06000100,
+ 0xf088, 0xffffffff, 0x00000100,
+ 0xf086, 0xffffffff, 0x06000100,
+ 0xf081, 0xffffffff, 0x00000100,
+ 0xf0b8, 0xffffffff, 0x00000100,
+ 0xf089, 0xffffffff, 0x00000100,
+ 0xf080, 0xffffffff, 0x00000100,
+ 0xf08c, 0xffffffff, 0x00000100,
+ 0xf08d, 0xffffffff, 0x00000100,
+ 0xf094, 0xffffffff, 0x00000100,
+ 0xf095, 0xffffffff, 0x00000100,
+ 0xf096, 0xffffffff, 0x00000100,
+ 0xf097, 0xffffffff, 0x00000100,
+ 0xf098, 0xffffffff, 0x00000100,
+ 0xf09f, 0xffffffff, 0x00000100,
+ 0xf09e, 0xffffffff, 0x00000100,
+ 0xf084, 0xffffffff, 0x06000100,
+ 0xf0a4, 0xffffffff, 0x00000100,
+ 0xf09d, 0xffffffff, 0x00000100,
+ 0xf0ad, 0xffffffff, 0x00000100,
+ 0xf0ac, 0xffffffff, 0x00000100,
+ 0xf09c, 0xffffffff, 0x00000100,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf008, 0xffffffff, 0x00010000,
+ 0xf009, 0xffffffff, 0x00030002,
+ 0xf00a, 0xffffffff, 0x00040007,
+ 0xf00b, 0xffffffff, 0x00060005,
+ 0xf00c, 0xffffffff, 0x00090008,
+ 0xf00d, 0xffffffff, 0x00010000,
+ 0xf00e, 0xffffffff, 0x00030002,
+ 0xf00f, 0xffffffff, 0x00040007,
+ 0xf010, 0xffffffff, 0x00060005,
+ 0xf011, 0xffffffff, 0x00090008,
+ 0xf000, 0xffffffff, 0x96e00200,
+ 0x21c2, 0xffffffff, 0x00900100,
+ 0x3109, 0xffffffff, 0x0020003f,
+ 0xe, 0xffffffff, 0x0140001c,
+ 0xf, 0x000f0000, 0x000f0000,
+ 0x88, 0xffffffff, 0xc060000c,
+ 0x89, 0xc0000fff, 0x00000100,
+ 0x82a, 0xffffffff, 0x00000104,
+ 0x1579, 0xff000fff, 0x00000100,
+ 0xc33, 0xc0000fff, 0x00000104,
+ 0x3079, 0x00000001, 0x00000001,
+ 0x3403, 0xff000ff0, 0x00000100,
+ 0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 hawaii_golden_spm_registers[] =
+{
+ 0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 hawaii_golden_common_registers[] =
+{
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xa0d4, 0xffffffff, 0x3a00161a,
+ 0xa0d5, 0xffffffff, 0x0000002e,
+ 0x2684, 0xffffffff, 0x00018208,
+ 0x263e, 0xffffffff, 0x12011003
+};
+
+static const u32 hawaii_golden_registers[] =
+{
+ 0xcd5, 0x00000333, 0x00000333,
+ 0x2684, 0x00010000, 0x00058208,
+ 0x260c, 0xffffffff, 0x00000000,
+ 0x260d, 0xf00fffff, 0x00000400,
+ 0x260e, 0x0002021c, 0x00020200,
+ 0x31e, 0x00000080, 0x00000000,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0xf0311fff, 0x80300000,
+ 0xd43, 0x00810000, 0x408af000,
+ 0x1c0c, 0x31000111, 0x00000011,
+ 0xbd2, 0x73773777, 0x12010001,
+ 0x848, 0x0000007f, 0x0000001b,
+ 0x877, 0x00007fb6, 0x00002191,
+ 0xd8a, 0x0000003f, 0x0000000a,
+ 0xd8b, 0x0000003f, 0x0000000a,
+ 0xab9, 0x00073ffe, 0x000022a2,
+ 0x903, 0x000007ff, 0x00000000,
+ 0x22fc, 0x00002001, 0x00000001,
+ 0x22c9, 0xffffffff, 0x00ffffff,
+ 0xc281, 0x0000ff0f, 0x00000000,
+ 0xa293, 0x07ffffff, 0x06000000,
+ 0xf9e, 0x00000001, 0x00000002,
+ 0x31da, 0x00000008, 0x00000008,
+ 0x31dc, 0x00000f00, 0x00000800,
+ 0x31dd, 0x00000f00, 0x00000800,
+ 0x31e6, 0x00ffffff, 0x00ff7fbf,
+ 0x31e7, 0x00ffffff, 0x00ff7faf,
+ 0x2300, 0x000000ff, 0x00000800,
+ 0x390, 0x00001fff, 0x00001fff,
+ 0x2418, 0x0000007f, 0x00000020,
+ 0x2542, 0x00010000, 0x00010000,
+ 0x2b80, 0x00100000, 0x000ff07c,
+ 0x2b05, 0x000003ff, 0x0000000f,
+ 0x2b04, 0xffffffff, 0x7564fdec,
+ 0x2b03, 0xffffffff, 0x3120b9a8,
+ 0x2b02, 0x20000000, 0x0f9c0000
+};
+
+static const u32 hawaii_mgcg_cgcg_init[] =
+{
+ 0x3108, 0xffffffff, 0xfffffffd,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf0a8, 0xffffffff, 0x00000100,
+ 0xf082, 0xffffffff, 0x00000100,
+ 0xf0b0, 0xffffffff, 0x00000100,
+ 0xf0b2, 0xffffffff, 0x00000100,
+ 0xf0b1, 0xffffffff, 0x00000100,
+ 0x1579, 0xffffffff, 0x00200100,
+ 0xf0a0, 0xffffffff, 0x00000100,
+ 0xf085, 0xffffffff, 0x06000100,
+ 0xf088, 0xffffffff, 0x00000100,
+ 0xf086, 0xffffffff, 0x06000100,
+ 0xf081, 0xffffffff, 0x00000100,
+ 0xf0b8, 0xffffffff, 0x00000100,
+ 0xf089, 0xffffffff, 0x00000100,
+ 0xf080, 0xffffffff, 0x00000100,
+ 0xf08c, 0xffffffff, 0x00000100,
+ 0xf08d, 0xffffffff, 0x00000100,
+ 0xf094, 0xffffffff, 0x00000100,
+ 0xf095, 0xffffffff, 0x00000100,
+ 0xf096, 0xffffffff, 0x00000100,
+ 0xf097, 0xffffffff, 0x00000100,
+ 0xf098, 0xffffffff, 0x00000100,
+ 0xf09f, 0xffffffff, 0x00000100,
+ 0xf09e, 0xffffffff, 0x00000100,
+ 0xf084, 0xffffffff, 0x06000100,
+ 0xf0a4, 0xffffffff, 0x00000100,
+ 0xf09d, 0xffffffff, 0x00000100,
+ 0xf0ad, 0xffffffff, 0x00000100,
+ 0xf0ac, 0xffffffff, 0x00000100,
+ 0xf09c, 0xffffffff, 0x00000100,
+ 0xc200, 0xffffffff, 0xe0000000,
+ 0xf008, 0xffffffff, 0x00010000,
+ 0xf009, 0xffffffff, 0x00030002,
+ 0xf00a, 0xffffffff, 0x00040007,
+ 0xf00b, 0xffffffff, 0x00060005,
+ 0xf00c, 0xffffffff, 0x00090008,
+ 0xf00d, 0xffffffff, 0x00010000,
+ 0xf00e, 0xffffffff, 0x00030002,
+ 0xf00f, 0xffffffff, 0x00040007,
+ 0xf010, 0xffffffff, 0x00060005,
+ 0xf011, 0xffffffff, 0x00090008,
+ 0xf012, 0xffffffff, 0x00010000,
+ 0xf013, 0xffffffff, 0x00030002,
+ 0xf014, 0xffffffff, 0x00040007,
+ 0xf015, 0xffffffff, 0x00060005,
+ 0xf016, 0xffffffff, 0x00090008,
+ 0xf017, 0xffffffff, 0x00010000,
+ 0xf018, 0xffffffff, 0x00030002,
+ 0xf019, 0xffffffff, 0x00040007,
+ 0xf01a, 0xffffffff, 0x00060005,
+ 0xf01b, 0xffffffff, 0x00090008,
+ 0xf01c, 0xffffffff, 0x00010000,
+ 0xf01d, 0xffffffff, 0x00030002,
+ 0xf01e, 0xffffffff, 0x00040007,
+ 0xf01f, 0xffffffff, 0x00060005,
+ 0xf020, 0xffffffff, 0x00090008,
+ 0xf021, 0xffffffff, 0x00010000,
+ 0xf022, 0xffffffff, 0x00030002,
+ 0xf023, 0xffffffff, 0x00040007,
+ 0xf024, 0xffffffff, 0x00060005,
+ 0xf025, 0xffffffff, 0x00090008,
+ 0xf026, 0xffffffff, 0x00010000,
+ 0xf027, 0xffffffff, 0x00030002,
+ 0xf028, 0xffffffff, 0x00040007,
+ 0xf029, 0xffffffff, 0x00060005,
+ 0xf02a, 0xffffffff, 0x00090008,
+ 0xf02b, 0xffffffff, 0x00010000,
+ 0xf02c, 0xffffffff, 0x00030002,
+ 0xf02d, 0xffffffff, 0x00040007,
+ 0xf02e, 0xffffffff, 0x00060005,
+ 0xf02f, 0xffffffff, 0x00090008,
+ 0xf030, 0xffffffff, 0x00010000,
+ 0xf031, 0xffffffff, 0x00030002,
+ 0xf032, 0xffffffff, 0x00040007,
+ 0xf033, 0xffffffff, 0x00060005,
+ 0xf034, 0xffffffff, 0x00090008,
+ 0xf035, 0xffffffff, 0x00010000,
+ 0xf036, 0xffffffff, 0x00030002,
+ 0xf037, 0xffffffff, 0x00040007,
+ 0xf038, 0xffffffff, 0x00060005,
+ 0xf039, 0xffffffff, 0x00090008,
+ 0xf03a, 0xffffffff, 0x00010000,
+ 0xf03b, 0xffffffff, 0x00030002,
+ 0xf03c, 0xffffffff, 0x00040007,
+ 0xf03d, 0xffffffff, 0x00060005,
+ 0xf03e, 0xffffffff, 0x00090008,
+ 0x30c6, 0xffffffff, 0x00020200,
+ 0xcd4, 0xffffffff, 0x00000200,
+ 0x570, 0xffffffff, 0x00000400,
+ 0x157a, 0xffffffff, 0x00000000,
+ 0xbd4, 0xffffffff, 0x00000902,
+ 0xf000, 0xffffffff, 0x96940200,
+ 0x21c2, 0xffffffff, 0x00900100,
+ 0x3109, 0xffffffff, 0x0020003f,
+ 0xe, 0xffffffff, 0x0140001c,
+ 0xf, 0x000f0000, 0x000f0000,
+ 0x88, 0xffffffff, 0xc060000c,
+ 0x89, 0xc0000fff, 0x00000100,
+ 0x3e4, 0xffffffff, 0x00000100,
+ 0x3e6, 0x00000101, 0x00000000,
+ 0x82a, 0xffffffff, 0x00000104,
+ 0x1579, 0xff000fff, 0x00000100,
+ 0xc33, 0xc0000fff, 0x00000104,
+ 0x3079, 0x00000001, 0x00000001,
+ 0x3403, 0xff000ff0, 0x00000100,
+ 0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 godavari_golden_registers[] =
+{
+ 0x1579, 0xff607fff, 0xfc000100,
+ 0x1bb6, 0x00010101, 0x00010000,
+ 0x260c, 0xffffffff, 0x00000000,
+ 0x260c0, 0xf00fffff, 0x00000400,
+ 0x184c, 0xffffffff, 0x00010000,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0xf0311fff, 0x80300000,
+ 0x263e, 0x73773777, 0x12010001,
+ 0x263f, 0xffffffff, 0x00000010,
+ 0x200c, 0x00001f0f, 0x0000100a,
+ 0xbd2, 0x73773777, 0x12010001,
+ 0x902, 0x000fffff, 0x000c007f,
+ 0x2285, 0xf000003f, 0x00000007,
+ 0x22c9, 0xffffffff, 0x00ff0fff,
+ 0xc281, 0x0000ff0f, 0x00000000,
+ 0xa293, 0x07ffffff, 0x06000000,
+ 0x136, 0x00000fff, 0x00000100,
+ 0x3405, 0x00010000, 0x00810001,
+ 0x3605, 0x00010000, 0x00810001,
+ 0xf9e, 0x00000001, 0x00000002,
+ 0x31da, 0x00000008, 0x00000008,
+ 0x31dc, 0x00000f00, 0x00000800,
+ 0x31dd, 0x00000f00, 0x00000800,
+ 0x31e6, 0x00ffffff, 0x00ff7fbf,
+ 0x31e7, 0x00ffffff, 0x00ff7faf,
+ 0x2300, 0x000000ff, 0x00000001,
+ 0x853e, 0x01ff01ff, 0x00000002,
+ 0x8526, 0x007ff800, 0x00200000,
+ 0x8057, 0xffffffff, 0x00000f40,
+ 0x2231, 0x001f3ae3, 0x00000082,
+ 0x2235, 0x0000001f, 0x00000010,
+ 0xc24d, 0xffffffff, 0x00000000
+};
+
+static void cik_init_golden_registers(struct amdgpu_device *adev)
+{
+ /* Some of the registers might be dependent on GRBM_GFX_INDEX */
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ amdgpu_device_program_register_sequence(adev,
+ bonaire_mgcg_cgcg_init,
+ ARRAY_SIZE(bonaire_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ bonaire_golden_registers,
+ ARRAY_SIZE(bonaire_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ bonaire_golden_common_registers,
+ ARRAY_SIZE(bonaire_golden_common_registers));
+ amdgpu_device_program_register_sequence(adev,
+ bonaire_golden_spm_registers,
+ ARRAY_SIZE(bonaire_golden_spm_registers));
+ break;
+ case CHIP_KABINI:
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_mgcg_cgcg_init,
+ ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_golden_registers,
+ ARRAY_SIZE(kalindi_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_golden_common_registers,
+ ARRAY_SIZE(kalindi_golden_common_registers));
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_golden_spm_registers,
+ ARRAY_SIZE(kalindi_golden_spm_registers));
+ break;
+ case CHIP_MULLINS:
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_mgcg_cgcg_init,
+ ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ godavari_golden_registers,
+ ARRAY_SIZE(godavari_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_golden_common_registers,
+ ARRAY_SIZE(kalindi_golden_common_registers));
+ amdgpu_device_program_register_sequence(adev,
+ kalindi_golden_spm_registers,
+ ARRAY_SIZE(kalindi_golden_spm_registers));
+ break;
+ case CHIP_KAVERI:
+ amdgpu_device_program_register_sequence(adev,
+ spectre_mgcg_cgcg_init,
+ ARRAY_SIZE(spectre_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ spectre_golden_registers,
+ ARRAY_SIZE(spectre_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ spectre_golden_common_registers,
+ ARRAY_SIZE(spectre_golden_common_registers));
+ amdgpu_device_program_register_sequence(adev,
+ spectre_golden_spm_registers,
+ ARRAY_SIZE(spectre_golden_spm_registers));
+ break;
+ case CHIP_HAWAII:
+ amdgpu_device_program_register_sequence(adev,
+ hawaii_mgcg_cgcg_init,
+ ARRAY_SIZE(hawaii_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ hawaii_golden_registers,
+ ARRAY_SIZE(hawaii_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ hawaii_golden_common_registers,
+ ARRAY_SIZE(hawaii_golden_common_registers));
+ amdgpu_device_program_register_sequence(adev,
+ hawaii_golden_spm_registers,
+ ARRAY_SIZE(hawaii_golden_spm_registers));
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+/**
+ * cik_get_xclk - get the xclk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the reference clock used by the gfx engine
+ * (CIK).
+ */
+static u32 cik_get_xclk(struct amdgpu_device *adev)
+{
+ u32 reference_clock = adev->clock.spll.reference_freq;
+
+ if (adev->flags & AMD_IS_APU) {
+ if (RREG32_SMC(ixGENERAL_PWRMGT) & GENERAL_PWRMGT__GPU_COUNTER_CLK_MASK)
+ return reference_clock / 2;
+ } else {
+ if (RREG32_SMC(ixCG_CLKPIN_CNTL) & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
+ return reference_clock / 4;
+ }
+ return reference_clock;
+}
+
+/**
+ * cik_srbm_select - select specific register instances
+ *
+ * @adev: amdgpu_device pointer
+ * @me: selected ME (micro engine)
+ * @pipe: pipe
+ * @queue: queue
+ * @vmid: VMID
+ *
+ * Switches the currently active registers instances. Some
+ * registers are instanced per VMID, others are instanced per
+ * me/pipe/queue combination.
+ */
+void cik_srbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 srbm_gfx_cntl =
+ (((pipe << SRBM_GFX_CNTL__PIPEID__SHIFT) & SRBM_GFX_CNTL__PIPEID_MASK)|
+ ((me << SRBM_GFX_CNTL__MEID__SHIFT) & SRBM_GFX_CNTL__MEID_MASK)|
+ ((vmid << SRBM_GFX_CNTL__VMID__SHIFT) & SRBM_GFX_CNTL__VMID_MASK)|
+ ((queue << SRBM_GFX_CNTL__QUEUEID__SHIFT) & SRBM_GFX_CNTL__QUEUEID_MASK));
+ WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl);
+}
+
+static void cik_vga_set_state(struct amdgpu_device *adev, bool state)
+{
+ uint32_t tmp;
+
+ tmp = RREG32(mmCONFIG_CNTL);
+ if (!state)
+ tmp |= CONFIG_CNTL__VGA_DIS_MASK;
+ else
+ tmp &= ~CONFIG_CNTL__VGA_DIS_MASK;
+ WREG32(mmCONFIG_CNTL, tmp);
+}
+
+static bool cik_read_disabled_bios(struct amdgpu_device *adev)
+{
+ u32 bus_cntl;
+ u32 d1vga_control = 0;
+ u32 d2vga_control = 0;
+ u32 vga_render_control = 0;
+ u32 rom_cntl;
+ bool r;
+
+ bus_cntl = RREG32(mmBUS_CNTL);
+ if (adev->mode_info.num_crtc) {
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
+ }
+ rom_cntl = RREG32_SMC(ixROM_CNTL);
+
+ /* enable the rom */
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
+ if (adev->mode_info.num_crtc) {
+ /* Disable VGA mode */
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
+ }
+ WREG32_SMC(ixROM_CNTL, rom_cntl | ROM_CNTL__SCK_OVERWRITE_MASK);
+
+ r = amdgpu_read_bios(adev);
+
+ /* restore regs */
+ WREG32(mmBUS_CNTL, bus_cntl);
+ if (adev->mode_info.num_crtc) {
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
+ }
+ WREG32_SMC(ixROM_CNTL, rom_cntl);
+ return r;
+}
+
+static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ unsigned long flags;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+ /* take the smc lock since we are using the smc index */
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ /* set rom index to 0 */
+ WREG32(mmSMC_IND_INDEX_0, ixROM_INDEX);
+ WREG32(mmSMC_IND_DATA_0, 0);
+ /* set index to data for continous read */
+ WREG32(mmSMC_IND_INDEX_0, ixROM_DATA);
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(mmSMC_IND_DATA_0);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+
+ return true;
+}
+
+static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
+ {mmGRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmGRBM_STATUS_SE2},
+ {mmGRBM_STATUS_SE3},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+ {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
+ {mmCP_CPF_BUSY_STAT},
+ {mmCP_CPF_STALLED_STAT1},
+ {mmCP_CPF_STATUS},
+ {mmCP_CPC_BUSY_STAT},
+ {mmCP_CPC_STALLED_STAT1},
+ {mmCP_CPC_STATUS},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmGB_MACROTILE_MODE0},
+ {mmGB_MACROTILE_MODE1},
+ {mmGB_MACROTILE_MODE2},
+ {mmGB_MACROTILE_MODE3},
+ {mmGB_MACROTILE_MODE4},
+ {mmGB_MACROTILE_MODE5},
+ {mmGB_MACROTILE_MODE6},
+ {mmGB_MACROTILE_MODE7},
+ {mmGB_MACROTILE_MODE8},
+ {mmGB_MACROTILE_MODE9},
+ {mmGB_MACROTILE_MODE10},
+ {mmGB_MACROTILE_MODE11},
+ {mmGB_MACROTILE_MODE12},
+ {mmGB_MACROTILE_MODE13},
+ {mmGB_MACROTILE_MODE14},
+ {mmGB_MACROTILE_MODE15},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmGB_BACKEND_MAP, false},
+ {mmPA_SC_RASTER_CONFIG, true},
+ {mmPA_SC_RASTER_CONFIG_1, true},
+};
+
+
+static uint32_t cik_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ uint32_t val;
+ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
+ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
+
+ switch (reg_offset) {
+ case mmCC_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
+ case mmGC_USER_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
+ case mmPA_SC_RASTER_CONFIG:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
+ case mmPA_SC_RASTER_CONFIG_1:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1;
+ }
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+ } else {
+ unsigned idx;
+
+ switch (reg_offset) {
+ case mmGB_ADDR_CONFIG:
+ return adev->gfx.config.gb_addr_config;
+ case mmMC_ARB_RAMCFG:
+ return adev->gfx.config.mc_arb_ramcfg;
+ case mmGB_TILE_MODE0:
+ case mmGB_TILE_MODE1:
+ case mmGB_TILE_MODE2:
+ case mmGB_TILE_MODE3:
+ case mmGB_TILE_MODE4:
+ case mmGB_TILE_MODE5:
+ case mmGB_TILE_MODE6:
+ case mmGB_TILE_MODE7:
+ case mmGB_TILE_MODE8:
+ case mmGB_TILE_MODE9:
+ case mmGB_TILE_MODE10:
+ case mmGB_TILE_MODE11:
+ case mmGB_TILE_MODE12:
+ case mmGB_TILE_MODE13:
+ case mmGB_TILE_MODE14:
+ case mmGB_TILE_MODE15:
+ case mmGB_TILE_MODE16:
+ case mmGB_TILE_MODE17:
+ case mmGB_TILE_MODE18:
+ case mmGB_TILE_MODE19:
+ case mmGB_TILE_MODE20:
+ case mmGB_TILE_MODE21:
+ case mmGB_TILE_MODE22:
+ case mmGB_TILE_MODE23:
+ case mmGB_TILE_MODE24:
+ case mmGB_TILE_MODE25:
+ case mmGB_TILE_MODE26:
+ case mmGB_TILE_MODE27:
+ case mmGB_TILE_MODE28:
+ case mmGB_TILE_MODE29:
+ case mmGB_TILE_MODE30:
+ case mmGB_TILE_MODE31:
+ idx = (reg_offset - mmGB_TILE_MODE0);
+ return adev->gfx.config.tile_mode_array[idx];
+ case mmGB_MACROTILE_MODE0:
+ case mmGB_MACROTILE_MODE1:
+ case mmGB_MACROTILE_MODE2:
+ case mmGB_MACROTILE_MODE3:
+ case mmGB_MACROTILE_MODE4:
+ case mmGB_MACROTILE_MODE5:
+ case mmGB_MACROTILE_MODE6:
+ case mmGB_MACROTILE_MODE7:
+ case mmGB_MACROTILE_MODE8:
+ case mmGB_MACROTILE_MODE9:
+ case mmGB_MACROTILE_MODE10:
+ case mmGB_MACROTILE_MODE11:
+ case mmGB_MACROTILE_MODE12:
+ case mmGB_MACROTILE_MODE13:
+ case mmGB_MACROTILE_MODE14:
+ case mmGB_MACROTILE_MODE15:
+ idx = (reg_offset - mmGB_MACROTILE_MODE0);
+ return adev->gfx.config.macrotile_mode_array[idx];
+ default:
+ return RREG32(reg_offset);
+ }
+ }
+}
+
+static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(cik_allowed_read_registers); i++) {
+ bool indexed = cik_allowed_read_registers[i].grbm_indexed;
+
+ if (reg_offset != cik_allowed_read_registers[i].reg_offset)
+ continue;
+
+ *value = cik_get_register_value(adev, indexed, se_num, sh_num,
+ reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+struct kv_reset_save_regs {
+ u32 gmcon_reng_execute;
+ u32 gmcon_misc;
+ u32 gmcon_misc3;
+};
+
+static void kv_save_regs_for_reset(struct amdgpu_device *adev,
+ struct kv_reset_save_regs *save)
+{
+ save->gmcon_reng_execute = RREG32(mmGMCON_RENG_EXECUTE);
+ save->gmcon_misc = RREG32(mmGMCON_MISC);
+ save->gmcon_misc3 = RREG32(mmGMCON_MISC3);
+
+ WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute &
+ ~GMCON_RENG_EXECUTE__RENG_EXECUTE_ON_PWR_UP_MASK);
+ WREG32(mmGMCON_MISC, save->gmcon_misc &
+ ~(GMCON_MISC__RENG_EXECUTE_ON_REG_UPDATE_MASK |
+ GMCON_MISC__STCTRL_STUTTER_EN_MASK));
+}
+
+static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
+ struct kv_reset_save_regs *save)
+{
+ int i;
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x200010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x300010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x210000);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0xa00010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x21003);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0xb00010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x2b00);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0xc00010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0xd00010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x420000);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x100010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x120202);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x500010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x3e3e36);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x600010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x373f3e);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0x700010ff);
+
+ for (i = 0; i < 5; i++)
+ WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+ WREG32(mmGMCON_PGFSM_WRITE, 0x3e1332);
+ WREG32(mmGMCON_PGFSM_CONFIG, 0xe00010ff);
+
+ WREG32(mmGMCON_MISC3, save->gmcon_misc3);
+ WREG32(mmGMCON_MISC, save->gmcon_misc);
+ WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
+}
+
+/**
+ * cik_asic_pci_config_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use PCI Config method to reset the GPU.
+ *
+ * Returns 0 for success.
+ */
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
+{
+ struct kv_reset_save_regs kv_save = { 0 };
+ u32 i;
+ int r = -EINVAL;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ if (adev->flags & AMD_IS_APU)
+ kv_save_regs_for_reset(adev, &kv_save);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ /* reset */
+ amdgpu_device_pci_config_reset(adev);
+
+ udelay(100);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+ /* enable BM */
+ pci_set_master(adev->pdev);
+ adev->has_hw_reset = true;
+ r = 0;
+ break;
+ }
+ udelay(1);
+ }
+
+ /* does asic init need to be run first??? */
+ if (adev->flags & AMD_IS_APU)
+ kv_restore_regs_for_reset(adev, &kv_save);
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return r;
+}
+
+static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
+static enum amd_reset_method
+cik_asic_reset_method(struct amdgpu_device *adev)
+{
+ bool baco_reset;
+
+ if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ baco_reset = cik_asic_supports_baco(adev);
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ dev_info(adev->dev, "BACO reset\n");
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ dev_info(adev->dev, "PCI CONFIG reset\n");
+ r = cik_asic_pci_config_reset(adev);
+ }
+
+ return r;
+}
+
+static u32 cik_get_config_memsize(struct amdgpu_device *adev)
+{
+ return RREG32(mmCONFIG_MEMSIZE);
+}
+
+static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
+ u32 cntl_reg, u32 status_reg)
+{
+ int r, i;
+ struct atom_clock_dividers dividers;
+ uint32_t tmp;
+
+ r = amdgpu_atombios_get_clock_dividers(adev,
+ COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+ clock, false, &dividers);
+ if (r)
+ return r;
+
+ tmp = RREG32_SMC(cntl_reg);
+ tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+ CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+ tmp |= dividers.post_divider;
+ WREG32_SMC(cntl_reg, tmp);
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+ break;
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int cik_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ int r = 0;
+
+ r = cik_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+ if (r)
+ return r;
+
+ r = cik_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+ return r;
+}
+
+static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ int r, i;
+ struct atom_clock_dividers dividers;
+ u32 tmp;
+
+ r = amdgpu_atombios_get_clock_dividers(adev,
+ COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+ ecclk, false, &dividers);
+ if (r)
+ return r;
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ break;
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ tmp = RREG32_SMC(ixCG_ECLK_CNTL);
+ tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
+ CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+ tmp |= dividers.post_divider;
+ WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ break;
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
+{
+ struct pci_dev *root = adev->pdev->bus->self;
+ u32 speed_cntl, current_data_rate;
+ int i;
+ u16 tmp16;
+
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
+ if (amdgpu_pcie_gen2 == 0)
+ return;
+
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
+ return;
+
+ speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+ PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+ if (current_data_rate == 2) {
+ DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
+ if (current_data_rate == 1) {
+ DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ }
+
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
+ return;
+
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+ /* re-try equalization if gen3 is not already enabled */
+ if (current_data_rate != 2) {
+ u16 bridge_cfg, gpu_cfg;
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
+ PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+ current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK)
+ >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
+
+ if (current_lw < max_lw) {
+ tmp = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+ tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+ tmp |= (max_lw <<
+ PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+ tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+ WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
+ }
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* check status */
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
+ if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+ break;
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
+
+ tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+
+ tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+ WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+
+ msleep(100);
+
+ /* linkctl */
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+
+ /* linkctl2 */
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+ tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+ }
+ }
+ }
+
+ /* set the link speed */
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK |
+ PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+ speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+ WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
+ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
+ else
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+
+ speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+ WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void cik_program_aspm(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
+ /* XXX double check APUs */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
+ PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_N_FTS_CNTL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL3, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK |
+ PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (!disable_l0s)
+ data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
+
+ if (!disable_l1) {
+ data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+
+ if (!disable_plloff_in_l1) {
+ bool clk_req_support;
+
+ orig = data = RREG32_PCIE(ixPB0_PIF_PWRDOWN_0);
+ data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK |
+ PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) |
+ (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPB0_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PCIE(ixPB0_PIF_PWRDOWN_1);
+ data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK |
+ PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) |
+ (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPB0_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PCIE(ixPB1_PIF_PWRDOWN_0);
+ data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK |
+ PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) |
+ (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPB1_PIF_PWRDOWN_0, data);
+
+ orig = data = RREG32_PCIE(ixPB1_PIF_PWRDOWN_1);
+ data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK |
+ PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) |
+ (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPB1_PIF_PWRDOWN_1, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ data |= ~(3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, data);
+
+ if (!disable_clkreq) {
+ struct pci_dev *root = adev->pdev->bus->self;
+ u32 lnkcap;
+
+ clk_req_support = false;
+ pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+ if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+ clk_req_support = true;
+ } else {
+ clk_req_support = false;
+ }
+
+ if (clk_req_support) {
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL2, data);
+
+ orig = data = RREG32_SMC(ixTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK |
+ THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) |
+ (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixTHM_CLK_CNTL, data);
+
+ orig = data = RREG32_SMC(ixMISC_CLK_CTRL);
+ data &= ~(MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK |
+ MISC_CLK_CTRL__ZCLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT) |
+ (1 << MISC_CLK_CTRL__ZCLK_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixMISC_CLK_CTRL, data);
+
+ orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL);
+ data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
+ if (orig != data)
+ WREG32_SMC(ixCG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
+ data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
+ if (orig != data)
+ WREG32_SMC(ixCG_CLKPIN_CNTL_2, data);
+
+ orig = data = RREG32_SMC(ixMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= (4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixMPLL_BYPASSCLK_SEL, data);
+ }
+ }
+ } else {
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+ }
+
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_CNTL2, data);
+
+ if (!disable_l0s) {
+ data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+ if ((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) ==
+ PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+ data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) &&
+ (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+ }
+ }
+ }
+}
+
+static uint32_t cik_get_rev_id(struct amdgpu_device *adev)
+{
+ return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
+}
+
+static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void cik_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
+static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static bool cik_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 clock_cntl, pc;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* check if the SMC is already running */
+ clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
+ pc = RREG32_SMC(ixSMC_PC_C);
+ if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
+ (0x20100 <= pc))
+ return true;
+
+ return false;
+}
+
+static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
+static void cik_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static const struct amdgpu_asic_funcs cik_asic_funcs =
+{
+ .read_disabled_bios = &cik_read_disabled_bios,
+ .read_bios_from_rom = &cik_read_bios_from_rom,
+ .read_register = &cik_read_register,
+ .reset = &cik_asic_reset,
+ .reset_method = &cik_asic_reset_method,
+ .set_vga_state = &cik_vga_set_state,
+ .get_xclk = &cik_get_xclk,
+ .set_uvd_clocks = &cik_set_uvd_clocks,
+ .set_vce_clocks = &cik_set_vce_clocks,
+ .get_config_memsize = &cik_get_config_memsize,
+ .flush_hdp = &cik_flush_hdp,
+ .invalidate_hdp = &cik_invalidate_hdp,
+ .need_full_reset = &cik_need_full_reset,
+ .init_doorbell_index = &legacy_doorbell_index_init,
+ .get_pcie_usage = &cik_get_pcie_usage,
+ .need_reset_on_init = &cik_need_reset_on_init,
+ .get_pcie_replay_count = &cik_get_pcie_replay_count,
+ .supports_baco = &cik_asic_supports_baco,
+ .pre_asic_init = &cik_pre_asic_init,
+ .query_video_codecs = &cik_query_video_codecs,
+};
+
+static int cik_common_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->smc_rreg = &cik_smc_rreg;
+ adev->smc_wreg = &cik_smc_wreg;
+ adev->pcie_rreg = &cik_pcie_rreg;
+ adev->pcie_wreg = &cik_pcie_wreg;
+ adev->uvd_ctx_rreg = &cik_uvd_ctx_rreg;
+ adev->uvd_ctx_wreg = &cik_uvd_ctx_wreg;
+ adev->didt_rreg = &cik_didt_rreg;
+ adev->didt_wreg = &cik_didt_wreg;
+
+ adev->asic_funcs = &cik_asic_funcs;
+
+ adev->rev_id = cik_get_rev_id(adev);
+ adev->external_rev_id = 0xFF;
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x14;
+ break;
+ case CHIP_HAWAII:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = 0x28;
+ break;
+ case CHIP_KAVERI:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags =
+ /*AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |*/
+ AMD_PG_SUPPORT_UVD |
+ AMD_PG_SUPPORT_VCE |
+ /* AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_ACP |
+ AMD_PG_SUPPORT_SAMU |*/
+ 0;
+ if (adev->pdev->device == 0x1312 ||
+ adev->pdev->device == 0x1316 ||
+ adev->pdev->device == 0x1317)
+ adev->external_rev_id = 0x41;
+ else
+ adev->external_rev_id = 0x1;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags =
+ /*AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG | */
+ AMD_PG_SUPPORT_UVD |
+ /*AMD_PG_SUPPORT_VCE |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_SAMU |*/
+ 0;
+ if (adev->asic_type == CHIP_KABINI) {
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x81;
+ else if (adev->rev_id == 1)
+ adev->external_rev_id = 0x82;
+ else if (adev->rev_id == 2)
+ adev->external_rev_id = 0x85;
+ } else
+ adev->external_rev_id = adev->rev_id + 0xa1;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cik_common_sw_init(void *handle)
+{
+ return 0;
+}
+
+static int cik_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int cik_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* move the golden regs per IP block */
+ cik_init_golden_registers(adev);
+ /* enable pcie gen2/3 link */
+ cik_pcie_gen3_enable(adev);
+ /* enable aspm */
+ cik_program_aspm(adev);
+
+ return 0;
+}
+
+static int cik_common_hw_fini(void *handle)
+{
+ return 0;
+}
+
+static int cik_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_common_hw_fini(adev);
+}
+
+static int cik_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_common_hw_init(adev);
+}
+
+static bool cik_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int cik_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int cik_common_soft_reset(void *handle)
+{
+ /* XXX hard reset?? */
+ return 0;
+}
+
+static int cik_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int cik_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs cik_common_ip_funcs = {
+ .name = "cik_common",
+ .early_init = cik_common_early_init,
+ .late_init = NULL,
+ .sw_init = cik_common_sw_init,
+ .sw_fini = cik_common_sw_fini,
+ .hw_init = cik_common_hw_init,
+ .hw_fini = cik_common_hw_fini,
+ .suspend = cik_common_suspend,
+ .resume = cik_common_resume,
+ .is_idle = cik_common_is_idle,
+ .wait_for_idle = cik_common_wait_for_idle,
+ .soft_reset = cik_common_soft_reset,
+ .set_clockgating_state = cik_common_set_clockgating_state,
+ .set_powergating_state = cik_common_set_powergating_state,
+};
+
+static const struct amdgpu_ip_block_version cik_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &cik_common_ip_funcs,
+};
+
+int cik_set_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+ break;
+ case CHIP_HAWAII:
+ amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+ break;
+ case CHIP_KAVERI:
+ amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+ amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
+
+ amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+ amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
new file mode 100644
index 000000000..f91ab4c24
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_H__
+#define __CIK_H__
+
+#define CIK_FLUSH_GPU_TLB_NUM_WREG 3
+
+void cik_srbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+int cik_set_ip_blocks(struct amdgpu_device *adev);
+
+void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
new file mode 100644
index 000000000..6f7c031dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "cikd.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written. When the
+ * pointers are equal, the ring is idle. When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr. When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * cik_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (CIK).
+ */
+static void cik_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+ ih_cntl |= IH_CNTL__ENABLE_INTR_MASK;
+ ih_rb_cntl |= IH_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmIH_CNTL, ih_cntl);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+/**
+ * cik_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (CIK).
+ */
+static void cik_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+
+ ih_rb_cntl &= ~IH_RB_CNTL__RB_ENABLE_MASK;
+ ih_cntl &= ~IH_CNTL__ENABLE_INTR_MASK;
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ WREG32(mmIH_CNTL, ih_cntl);
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+/**
+ * cik_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (CIK).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int cik_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ int rb_bufsz;
+ u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+
+ /* disable irqs */
+ cik_ih_disable_interrupts(adev);
+
+ /* setup interrupt control */
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl &= ~INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK;
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl &= ~INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK;
+ WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+ WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+ rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+
+ ih_rb_cntl = (IH_RB_CNTL__WPTR_OVERFLOW_ENABLE_MASK |
+ IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK |
+ (rb_bufsz << 1));
+
+ ih_rb_cntl |= IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK;
+
+ /* set the writeback address whether it's enabled or not */
+ WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+ WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+
+ /* Default settings for IH_CNTL (disabled at first) */
+ ih_cntl = (0x10 << IH_CNTL__MC_WRREQ_CREDIT__SHIFT) |
+ (0x10 << IH_CNTL__MC_WR_CLEAN_CNT__SHIFT) |
+ (0 << IH_CNTL__MC_VMID__SHIFT);
+ /* IH_CNTL__RPTR_REARM_MASK only works if msi's are enabled */
+ if (adev->irq.msi_enabled)
+ ih_cntl |= IH_CNTL__RPTR_REARM_MASK;
+ WREG32(mmIH_CNTL, ih_cntl);
+
+ pci_set_master(adev->pdev);
+
+ /* enable irqs */
+ cik_ih_enable_interrupts(adev);
+
+ return 0;
+}
+
+/**
+ * cik_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (CIK).
+ */
+static void cik_ih_irq_disable(struct amdgpu_device *adev)
+{
+ cik_ih_disable_interrupts(adev);
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * cik_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (CIK). Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cik_irq_process().
+ * Returns the value of the wptr.
+ */
+static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
+ wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 16). Hopefully
+ * this should allow us to catchup.
+ */
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
+ }
+ return (wptr & ih->ptr_mask);
+}
+
+/* CIK IV Ring
+ * Each IV ring entry is 128 bits:
+ * [7:0] - interrupt source id
+ * [31:8] - reserved
+ * [59:32] - interrupt source data
+ * [63:60] - reserved
+ * [71:64] - RINGID
+ * CP:
+ * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
+ * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
+ * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
+ * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
+ * PIPE_ID - ME0 0=3D
+ * - ME1&2 compute dispatcher (4 pipes each)
+ * SDMA:
+ * INSTANCE_ID [1:0], QUEUE_ID[1:0]
+ * INSTANCE_ID - 0 = sdma0, 1 = sdma1
+ * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
+ * [79:72] - VMID
+ * [95:80] - PASID
+ * [127:96] - reserved
+ */
+
+ /**
+ * cik_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void cik_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[4];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ entry->src_id = dw[0] & 0xff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
+ entry->ring_id = dw[2] & 0xff;
+ entry->vmid = (dw[2] >> 8) & 0xff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 16;
+}
+
+/**
+ * cik_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set wptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void cik_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ WREG32(mmIH_RB_RPTR, ih->rptr);
+}
+
+static int cik_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret)
+ return ret;
+
+ cik_ih_set_interrupt_funcs(adev);
+
+ return 0;
+}
+
+static int cik_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int cik_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+ amdgpu_irq_remove_domain(adev);
+
+ return 0;
+}
+
+static int cik_ih_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_ih_irq_init(adev);
+}
+
+static int cik_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cik_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int cik_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_ih_hw_fini(adev);
+}
+
+static int cik_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_ih_hw_init(adev);
+}
+
+static bool cik_ih_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ return false;
+
+ return true;
+}
+
+static int cik_ih_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS) & SRBM_STATUS__IH_BUSY_MASK;
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int cik_ih_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int cik_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int cik_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs cik_ih_ip_funcs = {
+ .name = "cik_ih",
+ .early_init = cik_ih_early_init,
+ .late_init = NULL,
+ .sw_init = cik_ih_sw_init,
+ .sw_fini = cik_ih_sw_fini,
+ .hw_init = cik_ih_hw_init,
+ .hw_fini = cik_ih_hw_fini,
+ .suspend = cik_ih_suspend,
+ .resume = cik_ih_resume,
+ .is_idle = cik_ih_is_idle,
+ .wait_for_idle = cik_ih_wait_for_idle,
+ .soft_reset = cik_ih_soft_reset,
+ .set_clockgating_state = cik_ih_set_clockgating_state,
+ .set_powergating_state = cik_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs cik_ih_funcs = {
+ .get_wptr = cik_ih_get_wptr,
+ .decode_iv = cik_ih_decode_iv,
+ .set_rptr = cik_ih_set_rptr
+};
+
+static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &cik_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version cik_ih_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &cik_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.h b/drivers/gpu/drm/amd/amdgpu/cik_ih.h
new file mode 100644
index 000000000..1d9ddee28
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_IH_H__
+#define __CIK_IH_H__
+
+extern const struct amdgpu_ip_block_version cik_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
new file mode 100644
index 000000000..52598fbc9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -0,0 +1,1385 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "cikd.h"
+#include "cik.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+{
+ SDMA0_REGISTER_OFFSET,
+ SDMA1_REGISTER_OFFSET
+};
+
+static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static int cik_sdma_soft_reset(void *handle);
+
+MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
+
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+}
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines. These engines are used for compute
+ * and gfx. There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things. It also has support for tiling/detiling of
+ * buffers.
+ */
+
+/**
+ * cik_sdma_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int cik_sdma_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0, i;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ chip_name = "bonaire";
+ break;
+ case CHIP_HAWAII:
+ chip_name = "hawaii";
+ break;
+ case CHIP_KAVERI:
+ chip_name = "kaveri";
+ break;
+ case CHIP_KABINI:
+ chip_name = "kabini";
+ break;
+ case CHIP_MULLINS:
+ chip_name = "mullins";
+ break;
+ default: BUG();
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ if (err)
+ goto out;
+ }
+out:
+ if (err) {
+ pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name);
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ }
+ return err;
+}
+
+/**
+ * cik_sdma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (CIK+).
+ */
+static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u32 rptr;
+
+ rptr = *ring->rptr_cpu_addr;
+
+ return (rptr & 0x3fffc) >> 2;
+}
+
+/**
+ * cik_sdma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (CIK+).
+ */
+static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2;
+}
+
+/**
+ * cik_sdma_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (CIK+).
+ */
+static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
+ (ring->wptr << 2) & 0x3fffc);
+}
+
+static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_NOP_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrive vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (CIK).
+ */
+static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 extra_bits = vmid & 0xf;
+
+ /* IB packet must end on a 8 DW boundary */
+ cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
+ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+ amdgpu_ring_write(ring, ib->length_dw);
+
+}
+
+/**
+ * cik_sdma_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+ SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+ u32 ref_and_mask;
+
+ if (ring->me == 0)
+ ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
+ else
+ ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
+
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
+}
+
+/**
+ * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (CIK).
+ */
+static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
+}
+
+/**
+ * cik_sdma_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (CIK).
+ */
+static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
+ }
+}
+
+/**
+ * cik_sdma_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (CIK).
+ */
+static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * cik_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+ if (enable) {
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, 1);
+ if (amdgpu_sdma_phase_quantum) {
+ WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ }
+ } else {
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, 0);
+ }
+
+ WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+ }
+}
+
+/**
+ * cik_sdma_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (CIK).
+ */
+static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 me_cntl;
+ int i;
+
+ if (!enable) {
+ cik_sdma_gfx_stop(adev);
+ cik_sdma_rlc_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ me_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
+ if (enable)
+ me_cntl &= ~SDMA0_F32_CNTL__HALT_MASK;
+ else
+ me_cntl |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], me_cntl);
+ }
+}
+
+/**
+ * cik_sdma_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ int i, j, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (j = 0; j < 16; j++) {
+ cik_srbm_select(adev, 0, 0, 0, j);
+ /* SDMA GFX */
+ WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
+ /* XXX SDMA RLC - todo */
+ }
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
+ WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= SDMA0_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK |
+ SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
+#endif
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
+ ((ring->rptr_gpu_addr) & 0xFFFFFFFC));
+
+ rb_cntl |= SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
+
+ WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+
+ /* enable DMA RB */
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
+ rb_cntl | SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK);
+
+ ib_cntl = SDMA0_GFX_IB_CNTL__IB_ENABLE_MASK;
+#ifdef __BIG_ENDIAN
+ ib_cntl |= SDMA0_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
+#endif
+ /* enable DMA IBs */
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+ }
+
+ cik_sdma_enable(adev, true);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * cik_sdma_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_rlc_resume(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+ return 0;
+}
+
+/**
+ * cik_sdma_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int cik_sdma_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ cik_sdma_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma.instance[i].feature_version >= 20)
+ adev->sdma.instance[i].burst_nop = true;
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
+ for (j = 0; j < fw_size; j++)
+ WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
+ WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * cik_sdma_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_start(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = cik_sdma_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* halt the engine before programing */
+ cik_sdma_enable(adev, false);
+ /* enable sdma ring preemption */
+ cik_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = cik_sdma_gfx_resume(adev);
+ if (r)
+ return r;
+ r = cik_sdma_rlc_resume(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * cik_sdma_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, 1); /* number of DWs to follow */
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * cik_sdma_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (CIK).
+ * Returns 0 on success, error on failure.
+ */
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = 1;
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * cik_sdma_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (CIK).
+ */
+static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+}
+
+/**
+ * cik_sdma_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (CIK).
+ */
+static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * cik_sdma_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
+}
+
+/**
+ * cik_sdma_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
+ SDMA_NOP_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+}
+
+/**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+ SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+ SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+ SDMA_POLL_REG_MEM_EXTRA_M));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
+/**
+ * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (CIK).
+ */
+static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+ SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
+
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* reference */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
+}
+
+static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+ WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
+ } else {
+ orig = data = RREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+ data |= 0xff000000;
+ if (data != orig)
+ WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+ data |= 0xff000000;
+ if (data != orig)
+ WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+ }
+}
+
+static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+ data |= 0x100;
+ if (orig != data)
+ WREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+ data |= 0x100;
+ if (orig != data)
+ WREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+ } else {
+ orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+ data &= ~0x100;
+ if (orig != data)
+ WREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+ orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+ data &= ~0x100;
+ if (orig != data)
+ WREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+ }
+}
+
+static int cik_sdma_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
+ cik_sdma_set_ring_funcs(adev);
+ cik_sdma_set_irq_funcs(adev);
+ cik_sdma_set_buffer_funcs(adev);
+ cik_sdma_set_vm_pte_funcs(adev);
+
+ return 0;
+}
+
+static int cik_sdma_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r, i;
+
+ r = cik_sdma_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int cik_sdma_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ cik_sdma_free_microcode(adev);
+ return 0;
+}
+
+static int cik_sdma_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = cik_sdma_start(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int cik_sdma_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cik_ctx_switch_enable(adev, false);
+ cik_sdma_enable(adev, false);
+
+ return 0;
+}
+
+static int cik_sdma_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cik_sdma_hw_fini(adev);
+}
+
+static int cik_sdma_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cik_sdma_soft_reset(handle);
+
+ return cik_sdma_hw_init(adev);
+}
+
+static bool cik_sdma_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS2);
+
+ if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int cik_sdma_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK);
+
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int cik_sdma_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp;
+
+ /* sdma0 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+
+ /* sdma1 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ switch (type) {
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~SDMA0_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl |= SDMA0_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~SDMA0_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl |= SDMA0_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int cik_sdma_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id, queue_id;
+
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ queue_id = (entry->ring_id & 0xc) >> 2;
+ DRM_DEBUG("IH: SDMA trap\n");
+ switch (instance_id) {
+ case 0:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ case 1:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
+ return 0;
+}
+
+static int cik_sdma_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ cik_enable_sdma_mgcg(adev, gate);
+ cik_enable_sdma_mgls(adev, gate);
+
+ return 0;
+}
+
+static int cik_sdma_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs cik_sdma_ip_funcs = {
+ .name = "cik_sdma",
+ .early_init = cik_sdma_early_init,
+ .late_init = NULL,
+ .sw_init = cik_sdma_sw_init,
+ .sw_fini = cik_sdma_sw_fini,
+ .hw_init = cik_sdma_hw_init,
+ .hw_fini = cik_sdma_hw_fini,
+ .suspend = cik_sdma_suspend,
+ .resume = cik_sdma_resume,
+ .is_idle = cik_sdma_is_idle,
+ .wait_for_idle = cik_sdma_wait_for_idle,
+ .soft_reset = cik_sdma_soft_reset,
+ .set_clockgating_state = cik_sdma_set_clockgating_state,
+ .set_powergating_state = cik_sdma_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0),
+ .support_64bit_ptrs = false,
+ .get_rptr = cik_sdma_ring_get_rptr,
+ .get_wptr = cik_sdma_ring_get_wptr,
+ .set_wptr = cik_sdma_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* cik_sdma_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* cik_sdma_ring_emit_pipeline_sync */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
+ 9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
+ .emit_ib = cik_sdma_ring_emit_ib,
+ .emit_fence = cik_sdma_ring_emit_fence,
+ .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
+ .emit_vm_flush = cik_sdma_ring_emit_vm_flush,
+ .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+ .test_ring = cik_sdma_ring_test_ring,
+ .test_ib = cik_sdma_ring_test_ib,
+ .insert_nop = cik_sdma_ring_insert_nop,
+ .pad_ib = cik_sdma_ring_pad_ib,
+ .emit_wreg = cik_sdma_ring_emit_wreg,
+};
+
+static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = {
+ .set = cik_sdma_set_trap_irq_state,
+ .process = cik_sdma_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs cik_sdma_illegal_inst_irq_funcs = {
+ .process = cik_sdma_process_illegal_inst_irq,
+};
+
+static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &cik_sdma_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &cik_sdma_illegal_inst_irq_funcs;
+}
+
+/**
+ * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: is this a secure operation
+ *
+ * Copy GPU buffers using the DMA engine (CIK).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (CIK).
+ */
+static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
+}
+
+static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
+ .copy_max_bytes = 0x1fffff,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = cik_sdma_emit_copy_buffer,
+
+ .fill_max_bytes = 0x1fffff,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = cik_sdma_emit_fill_buffer,
+};
+
+static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = cik_sdma_vm_copy_pte,
+
+ .write_pte = cik_sdma_vm_write_pte,
+ .set_pte_pde = cik_sdma_vm_set_pte_pde,
+};
+
+static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version cik_sdma_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &cik_sdma_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.h b/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
new file mode 100644
index 000000000..a4a8fe014
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_SDMA_H__
+#define __CIK_SDMA_H__
+
+extern const struct amdgpu_ip_block_version cik_sdma_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
new file mode 100644
index 000000000..55982c006
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -0,0 +1,607 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef CIK_H
+#define CIK_H
+
+#define MC_SEQ_MISC0__MT__MASK 0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
+#define MC_SEQ_MISC0__MT__DDR2 0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3 0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4 0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5 0x50000000
+#define MC_SEQ_MISC0__MT__HBM 0x60000000
+#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
+
+#define CP_ME_TABLE_SIZE 96
+
+/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
+#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c)
+#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c)
+#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c)
+#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c)
+#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c)
+#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c)
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
+
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+
+#define PIPEID(x) ((x) << 0)
+#define MEID(x) ((x) << 2)
+#define VMID(x) ((x) << 4)
+#define QUEUEID(x) ((x) << 8)
+
+#define mmCC_DRM_ID_STRAPS 0x1559
+#define CC_DRM_ID_STRAPS__ATI_REV_ID_MASK 0xf0000000
+
+#define mmCHUB_CONTROL 0x619
+#define BYPASS_VM (1 << 0)
+
+#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5)
+
+#define mmGRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
+#define LUT_10BIT_BYPASS_EN (1 << 8)
+
+# define CURSOR_MONO 0
+# define CURSOR_24_1 1
+# define CURSOR_24_8_PRE_MULT 2
+# define CURSOR_24_8_UNPRE_MULT 3
+# define CURSOR_URGENT_ALWAYS 0
+# define CURSOR_URGENT_1_8 1
+# define CURSOR_URGENT_1_4 2
+# define CURSOR_URGENT_3_8 3
+# define CURSOR_URGENT_1_2 4
+
+# define GRPH_DEPTH_8BPP 0
+# define GRPH_DEPTH_16BPP 1
+# define GRPH_DEPTH_32BPP 2
+/* 8 BPP */
+# define GRPH_FORMAT_INDEXED 0
+/* 16 BPP */
+# define GRPH_FORMAT_ARGB1555 0
+# define GRPH_FORMAT_ARGB565 1
+# define GRPH_FORMAT_ARGB4444 2
+# define GRPH_FORMAT_AI88 3
+# define GRPH_FORMAT_MONO16 4
+# define GRPH_FORMAT_BGRA5551 5
+/* 32 BPP */
+# define GRPH_FORMAT_ARGB8888 0
+# define GRPH_FORMAT_ARGB2101010 1
+# define GRPH_FORMAT_32BPP_DIG 2
+# define GRPH_FORMAT_8B_ARGB2101010 3
+# define GRPH_FORMAT_BGRA1010102 4
+# define GRPH_FORMAT_8B_BGRA1010102 5
+# define GRPH_FORMAT_RGB111110 6
+# define GRPH_FORMAT_BGR101111 7
+# define ADDR_SURF_MACRO_TILE_ASPECT_1 0
+# define ADDR_SURF_MACRO_TILE_ASPECT_2 1
+# define ADDR_SURF_MACRO_TILE_ASPECT_4 2
+# define ADDR_SURF_MACRO_TILE_ASPECT_8 3
+# define GRPH_ARRAY_LINEAR_GENERAL 0
+# define GRPH_ARRAY_LINEAR_ALIGNED 1
+# define GRPH_ARRAY_1D_TILED_THIN1 2
+# define GRPH_ARRAY_2D_TILED_THIN1 4
+# define DISPLAY_MICRO_TILING 0
+# define THIN_MICRO_TILING 1
+# define DEPTH_MICRO_TILING 2
+# define ROTATED_MICRO_TILING 4
+# define GRPH_ENDIAN_NONE 0
+# define GRPH_ENDIAN_8IN16 1
+# define GRPH_ENDIAN_8IN32 2
+# define GRPH_ENDIAN_8IN64 3
+# define GRPH_RED_SEL_R 0
+# define GRPH_RED_SEL_G 1
+# define GRPH_RED_SEL_B 2
+# define GRPH_RED_SEL_A 3
+# define GRPH_GREEN_SEL_G 0
+# define GRPH_GREEN_SEL_B 1
+# define GRPH_GREEN_SEL_A 2
+# define GRPH_GREEN_SEL_R 3
+# define GRPH_BLUE_SEL_B 0
+# define GRPH_BLUE_SEL_A 1
+# define GRPH_BLUE_SEL_R 2
+# define GRPH_BLUE_SEL_G 3
+# define GRPH_ALPHA_SEL_A 0
+# define GRPH_ALPHA_SEL_R 1
+# define GRPH_ALPHA_SEL_G 2
+# define GRPH_ALPHA_SEL_B 3
+# define INPUT_GAMMA_USE_LUT 0
+# define INPUT_GAMMA_BYPASS 1
+# define INPUT_GAMMA_SRGB_24 2
+# define INPUT_GAMMA_XVYCC_222 3
+
+# define INPUT_CSC_BYPASS 0
+# define INPUT_CSC_PROG_COEFF 1
+# define INPUT_CSC_PROG_SHARED_MATRIXA 2
+
+# define OUTPUT_CSC_BYPASS 0
+# define OUTPUT_CSC_TV_RGB 1
+# define OUTPUT_CSC_YCBCR_601 2
+# define OUTPUT_CSC_YCBCR_709 3
+# define OUTPUT_CSC_PROG_COEFF 4
+# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+
+# define DEGAMMA_BYPASS 0
+# define DEGAMMA_SRGB_24 1
+# define DEGAMMA_XVYCC_222 2
+# define GAMUT_REMAP_BYPASS 0
+# define GAMUT_REMAP_PROG_COEFF 1
+# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+
+# define REGAMMA_BYPASS 0
+# define REGAMMA_SRGB_24 1
+# define REGAMMA_XVYCC_222 2
+# define REGAMMA_PROG_A 3
+# define REGAMMA_PROG_B 4
+
+# define FMT_CLAMP_6BPC 0
+# define FMT_CLAMP_8BPC 1
+# define FMT_CLAMP_10BPC 2
+
+# define HDMI_24BIT_DEEP_COLOR 0
+# define HDMI_30BIT_DEEP_COLOR 1
+# define HDMI_36BIT_DEEP_COLOR 2
+# define HDMI_ACR_HW 0
+# define HDMI_ACR_32 1
+# define HDMI_ACR_44 2
+# define HDMI_ACR_48 3
+# define HDMI_ACR_X1 1
+# define HDMI_ACR_X2 2
+# define HDMI_ACR_X4 4
+# define AFMT_AVI_INFO_Y_RGB 0
+# define AFMT_AVI_INFO_Y_YCBCR422 1
+# define AFMT_AVI_INFO_Y_YCBCR444 2
+
+#define NO_AUTO 0
+#define ES_AUTO 1
+#define GS_AUTO 2
+#define ES_AND_GS_AUTO 3
+
+# define ARRAY_MODE(x) ((x) << 2)
+# define PIPE_CONFIG(x) ((x) << 6)
+# define TILE_SPLIT(x) ((x) << 11)
+# define MICRO_TILE_MODE_NEW(x) ((x) << 22)
+# define SAMPLE_SPLIT(x) ((x) << 25)
+# define BANK_WIDTH(x) ((x) << 0)
+# define BANK_HEIGHT(x) ((x) << 2)
+# define MACRO_TILE_ASPECT(x) ((x) << 4)
+# define NUM_BANKS(x) ((x) << 6)
+
+#define MSG_ENTER_RLC_SAFE_MODE 1
+#define MSG_EXIT_RLC_SAFE_MODE 0
+
+/*
+ * PM4
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_CONST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_CLIENT_CODE ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_TCL2_VOLATILE (1 << 22)
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+# define PACKET3_DEST_BASE_0_ENA (1 << 0)
+# define PACKET3_DEST_BASE_1_ENA (1 << 1)
+# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
+# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
+# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
+# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
+# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
+# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
+# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
+# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
+# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
+# define PACKET3_TCL1_VOL_ACTION_ENA (1 << 15)
+# define PACKET3_TC_VOL_ACTION_ENA (1 << 16) /* L2 */
+# define PACKET3_TC_WB_ACTION_ENA (1 << 18) /* L2 */
+# define PACKET3_DEST_BASE_2_ENA (1 << 19)
+# define PACKET3_DEST_BASE_3_ENA (1 << 21)
+# define PACKET3_TCL1_ACTION_ENA (1 << 22)
+# define PACKET3_TC_ACTION_ENA (1 << 23) /* L2 */
+# define PACKET3_CB_ACTION_ENA (1 << 25)
+# define PACKET3_DB_ACTION_ENA (1 << 26)
+# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+# define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28)
+# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - EOP events
+ * 6 - EOS events
+ */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define EOP_TCL1_VOL_ACTION_EN (1 << 12)
+#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */
+#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
+#define EOP_TCL1_ACTION_EN (1 << 16)
+#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
+#define EOP_TCL2_VOLATILE (1 << 24)
+#define EOP_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_RELEASE_MEM 0x49
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_DIS_WC (1 << 21)
+# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_ACQUIRE_MEM 0x58
+#define PACKET3_REWIND 0x59
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+
+/* SDMA - first instance at 0xd000, second at 0xd800 */
+#define SDMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
+#define SDMA_MAX_INSTANCE 2
+
+#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
+ (((sub_op) & 0xFF) << 8) | \
+ (((op) & 0xFF) << 0))
+/* sDMA opcodes */
+#define SDMA_OPCODE_NOP 0
+# define SDMA_NOP_COUNT(x) (((x) & 0x3FFF) << 16)
+#define SDMA_OPCODE_COPY 1
+# define SDMA_COPY_SUB_OPCODE_LINEAR 0
+# define SDMA_COPY_SUB_OPCODE_TILED 1
+# define SDMA_COPY_SUB_OPCODE_SOA 3
+# define SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 4
+# define SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 5
+# define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 6
+#define SDMA_OPCODE_WRITE 2
+# define SDMA_WRITE_SUB_OPCODE_LINEAR 0
+# define SDMA_WRITE_SUB_OPCODE_TILED 1
+#define SDMA_OPCODE_INDIRECT_BUFFER 4
+#define SDMA_OPCODE_FENCE 5
+#define SDMA_OPCODE_TRAP 6
+#define SDMA_OPCODE_SEMAPHORE 7
+# define SDMA_SEMAPHORE_EXTRA_O (1 << 13)
+ /* 0 - increment
+ * 1 - write 1
+ */
+# define SDMA_SEMAPHORE_EXTRA_S (1 << 14)
+ /* 0 - wait
+ * 1 - signal
+ */
+# define SDMA_SEMAPHORE_EXTRA_M (1 << 15)
+ /* mailbox */
+#define SDMA_OPCODE_POLL_REG_MEM 8
+# define SDMA_POLL_REG_MEM_EXTRA_OP(x) ((x) << 10)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+# define SDMA_POLL_REG_MEM_EXTRA_FUNC(x) ((x) << 12)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+# define SDMA_POLL_REG_MEM_EXTRA_M (1 << 15)
+ /* 0 = register
+ * 1 = memory
+ */
+#define SDMA_OPCODE_COND_EXEC 9
+#define SDMA_OPCODE_CONSTANT_FILL 11
+# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
+ /* 0 = byte fill
+ * 2 = DW fill
+ */
+#define SDMA_OPCODE_GENERATE_PTE_PDE 12
+#define SDMA_OPCODE_TIMESTAMP 13
+# define SDMA_TIMESTAMP_SUB_OPCODE_SET_LOCAL 0
+# define SDMA_TIMESTAMP_SUB_OPCODE_GET_LOCAL 1
+# define SDMA_TIMESTAMP_SUB_OPCODE_GET_GLOBAL 2
+#define SDMA_OPCODE_SRBM_WRITE 14
+# define SDMA_SRBM_WRITE_EXTRA_BYTE_ENABLE(x) ((x) << 12)
+ /* byte mask */
+
+#define VCE_CMD_NO_OP 0x00000000
+#define VCE_CMD_END 0x00000001
+#define VCE_CMD_IB 0x00000002
+#define VCE_CMD_FENCE 0x00000003
+#define VCE_CMD_TRAP 0x00000004
+#define VCE_CMD_IB_AUTO 0x00000005
+#define VCE_CMD_SEMAPHORE 0x00000006
+
+/* if PTR32, these are the bases for scratch and lds */
+#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
+#define SHARED_BASE(x) ((x) << 16) /* LDS */
+
+#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL)
+
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+enum {
+ MTYPE_CACHED = 0,
+ MTYPE_NONCACHED = 3
+};
+
+/* mmPA_SC_RASTER_CONFIG mask */
+#define RB_MAP_PKR0(x) ((x) << 0)
+#define RB_MAP_PKR0_MASK (0x3 << 0)
+#define RB_MAP_PKR1(x) ((x) << 2)
+#define RB_MAP_PKR1_MASK (0x3 << 2)
+#define RB_XSEL2(x) ((x) << 4)
+#define RB_XSEL2_MASK (0x3 << 4)
+#define RB_XSEL (1 << 6)
+#define RB_YSEL (1 << 7)
+#define PKR_MAP(x) ((x) << 8)
+#define PKR_MAP_MASK (0x3 << 8)
+#define PKR_XSEL(x) ((x) << 10)
+#define PKR_XSEL_MASK (0x3 << 10)
+#define PKR_YSEL(x) ((x) << 12)
+#define PKR_YSEL_MASK (0x3 << 12)
+#define SC_MAP(x) ((x) << 16)
+#define SC_MAP_MASK (0x3 << 16)
+#define SC_XSEL(x) ((x) << 18)
+#define SC_XSEL_MASK (0x3 << 18)
+#define SC_YSEL(x) ((x) << 20)
+#define SC_YSEL_MASK (0x3 << 20)
+#define SE_MAP(x) ((x) << 24)
+#define SE_MAP_MASK (0x3 << 24)
+#define SE_XSEL(x) ((x) << 26)
+#define SE_XSEL_MASK (0x3 << 26)
+#define SE_YSEL(x) ((x) << 28)
+#define SE_YSEL_MASK (0x3 << 28)
+
+/* mmPA_SC_RASTER_CONFIG_1 mask */
+#define SE_PAIR_MAP(x) ((x) << 0)
+#define SE_PAIR_MAP_MASK (0x3 << 0)
+#define SE_PAIR_XSEL(x) ((x) << 2)
+#define SE_PAIR_XSEL_MASK (0x3 << 2)
+#define SE_PAIR_YSEL(x) ((x) << 4)
+#define SE_PAIR_YSEL_MASK (0x3 << 4)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h b/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h
new file mode 100644
index 000000000..c3982f947
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int ci_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_INFO
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_SLICE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int ci_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+ 0, // HOLE
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0xffffffff, // VGT_MAX_VTX_INDX
+ 0x00000000, // VGT_MIN_VTX_INDX
+ 0x00000000, // VGT_INDX_OFFSET
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0, // HOLE
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x00000000, // DB_STENCILREFMASK
+ 0x00000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int ci_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+ 0x00000000, // VGT_DMA_BASE_HI
+ 0x00000000, // VGT_DMA_BASE
+};
+static const unsigned int ci_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int ci_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int ci_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int ci_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0, // HOLE
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000010, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_PITCH
+ 0x00000000, // CB_COLOR0_SLICE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_SLICE
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_SLICE
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_PITCH
+ 0x00000000, // CB_COLOR1_SLICE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_SLICE
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_SLICE
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_PITCH
+ 0x00000000, // CB_COLOR2_SLICE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_SLICE
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_SLICE
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_PITCH
+ 0x00000000, // CB_COLOR3_SLICE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_SLICE
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_SLICE
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_PITCH
+ 0x00000000, // CB_COLOR4_SLICE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_SLICE
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_SLICE
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_PITCH
+ 0x00000000, // CB_COLOR5_SLICE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_SLICE
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_SLICE
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_PITCH
+ 0x00000000, // CB_COLOR6_SLICE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_SLICE
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_SLICE
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_PITCH
+ 0x00000000, // CB_COLOR7_SLICE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_SLICE
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_SLICE
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def ci_SECT_CONTEXT_defs[] =
+{
+ {ci_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+ {ci_SECT_CONTEXT_def_2, 0x0000a0d6, 274 },
+ {ci_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+ {ci_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+ {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def ci_cs_data[] = {
+ { ci_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h b/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h
new file mode 100644
index 000000000..3eda707d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef CLEARSTATE_DEFS_H
+#define CLEARSTATE_DEFS_H
+
+enum section_id {
+ SECT_NONE,
+ SECT_CONTEXT,
+ SECT_CLEAR,
+ SECT_CTRLCONST
+};
+
+struct cs_extent_def {
+ const unsigned int *extent;
+ const unsigned int reg_index;
+ const unsigned int reg_count;
+};
+
+struct cs_section_def {
+ const struct cs_extent_def *section;
+ const enum section_id id;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
new file mode 100644
index 000000000..27a2ff0a0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
@@ -0,0 +1,975 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int gfx10_SECT_CONTEXT_def_1[] = {
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0x00000000, // HOLE
+ 0x00000000, // DB_DEPTH_SIZE_XY
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0x00000000, // DB_DFSM_CONTROL
+ 0x00000000, // DB_DEPTH_INFO
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_SLICE
+ 0x00000000, // DB_Z_INFO2
+ 0x00000000, // DB_STENCIL_INFO2
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00150055, // DB_RMI_L2_CACHE_CONTROL
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+ 0x00000000, // PA_SC_RASTER_CONFIG
+ 0x00000000, // PA_SC_RASTER_CONFIG_1
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_2[] = {
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_RIGHT_VERT_GRID
+ 0x00000000, // PA_SC_LEFT_VERT_GRID
+ 0x00000000, // PA_SC_HORIZ_GRID
+ 0x00000000, // HOLE
+ 0x00000000, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0xffffffff, // VGT_MAX_VTX_INDX
+ 0x00000000, // VGT_MIN_VTX_INDX
+ 0x00000000, // VGT_INDX_OFFSET
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_DCC_CONTROL
+ 0x00000000, // CB_COVERAGE_OUT_CONTROL
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_IDX_FORMAT
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_3[] = {
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_4[] = {
+ 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_5[] = {
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_6[] = {
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_7[] = {
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0x00000000, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0x00000000, // VGT_DISPATCH_DRAW_INDEX
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_8[] = {
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0, // HOLE
+ 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_PITCH
+ 0x00000000, // CB_COLOR0_SLICE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_DCC_CONTROL
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_SLICE
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_SLICE
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_PITCH
+ 0x00000000, // CB_COLOR1_SLICE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_DCC_CONTROL
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_SLICE
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_SLICE
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_PITCH
+ 0x00000000, // CB_COLOR2_SLICE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_DCC_CONTROL
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_SLICE
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_SLICE
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_PITCH
+ 0x00000000, // CB_COLOR3_SLICE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_DCC_CONTROL
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_SLICE
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_SLICE
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_PITCH
+ 0x00000000, // CB_COLOR4_SLICE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_DCC_CONTROL
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_SLICE
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_SLICE
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_PITCH
+ 0x00000000, // CB_COLOR5_SLICE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_DCC_CONTROL
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_SLICE
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_SLICE
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_PITCH
+ 0x00000000, // CB_COLOR6_SLICE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_DCC_CONTROL
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_SLICE
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_SLICE
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_PITCH
+ 0x00000000, // CB_COLOR7_SLICE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_DCC_CONTROL
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_SLICE
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_SLICE
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR0_ATTRIB3
+ 0x00000000, // CB_COLOR1_ATTRIB3
+ 0x00000000, // CB_COLOR2_ATTRIB3
+ 0x00000000, // CB_COLOR3_ATTRIB3
+ 0x00000000, // CB_COLOR4_ATTRIB3
+ 0x00000000, // CB_COLOR5_ATTRIB3
+ 0x00000000, // CB_COLOR6_ATTRIB3
+ 0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx10_SECT_CONTEXT_defs[] = {
+ {gfx10_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+ {gfx10_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {gfx10_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx10_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+ {gfx10_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx10_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx10_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+ {gfx10_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx10_cs_data[] = {
+ { gfx10_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
new file mode 100644
index 000000000..a8b29d33c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
@@ -0,0 +1,997 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX11_H_
+#define __CLEARSTATE_GFX11_H_
+
+static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_SIZE_XY
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_RESERVED_REG_2
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_RESERVED_REG_1
+ 0x00000000, // DB_RESERVED_REG_3
+ 0x00000000, // DB_SPI_VRS_CENTER_LOCATION
+ 0, // HOLE
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00150055, // DB_RMI_L2_CACHE_CONTROL
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+ 0x00000000, // PA_SC_RASTER_CONFIG
+ 0x00000000, // PA_SC_RASTER_CONFIG_1
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_PIPEID
+ 0x00000000, // CP_VMID
+ 0x00000000, // CONTEXT_RESERVED_REG0
+ 0x00000000, // CONTEXT_RESERVED_REG1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_FSR_EN
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
+ 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
+ 0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+ 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_VRS_RATE_BASE
+ 0x00000000, // PA_SC_VRS_RATE_BASE_EXT
+ 0x00000000, // PA_SC_VRS_RATE_SIZE_XY
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_FDCC_CONTROL
+ 0x00000000, // CB_COVERAGE_OUT_CONTROL
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+ 0x00000000, // PA_RATE_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0x00000000, // SPI_BARYC_SSAA_CNTL
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_LO
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_IDX_FORMAT
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT_CONTROL
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // GE_MAX_OUTPUT_PER_SUBGROUP
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x00000000, // PA_STEREO_CNTL
+ 0x00000000, // PA_STATE_STEREO_X
+ 0x00000000, // PA_CL_VRS_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0, // HOLE
+ 0x00000000, // VGT_REUSE_OFF
+ 0, // HOLE
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // GE_NGG_SUBGRP_CNTL
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0, // HOLE
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0x00000000, // PA_SC_BINNER_CNTL_2
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR0_ATTRIB3
+ 0x00000000, // CB_COLOR1_ATTRIB3
+ 0x00000000, // CB_COLOR2_ATTRIB3
+ 0x00000000, // CB_COLOR3_ATTRIB3
+ 0x00000000, // CB_COLOR4_ATTRIB3
+ 0x00000000, // CB_COLOR5_ATTRIB3
+ 0x00000000, // CB_COLOR6_ATTRIB3
+ 0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
+{
+ {gfx11_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+ {gfx11_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {gfx11_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+ {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx11_cs_data[] = {
+ { gfx11_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX11_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
new file mode 100644
index 000000000..567a90480
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -0,0 +1,942 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_DFSM_CONTROL
+ 0, // HOLE
+ 0x00000000, // DB_Z_INFO2
+ 0x00000000, // DB_STENCIL_INFO2
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+ 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_RIGHT_VERT_GRID
+ 0x00000000, // PA_SC_LEFT_VERT_GRID
+ 0x00000000, // PA_SC_HORIZ_GRID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0, // HOLE
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_DCC_CONTROL
+ 0, // HOLE
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+ 0x00000000, // CB_MRT0_EPITCH
+ 0x00000000, // CB_MRT1_EPITCH
+ 0x00000000, // CB_MRT2_EPITCH
+ 0x00000000, // CB_MRT3_EPITCH
+ 0x00000000, // CB_MRT4_EPITCH
+ 0x00000000, // CB_MRT5_EPITCH
+ 0x00000000, // CB_MRT6_EPITCH
+ 0x00000000, // CB_MRT7_EPITCH
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0, // HOLE
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0x00000000, // VGT_DISPATCH_DRAW_INDEX
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
+{
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00000000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0, // HOLE
+ 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_DCC_CONTROL
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_DCC_CONTROL
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_DCC_CONTROL
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_DCC_CONTROL
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_DCC_CONTROL
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_DCC_CONTROL
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_DCC_CONTROL
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_DCC_CONTROL
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+};
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
+{
+ {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+ {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
+ {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx9_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+ {gfx9_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx9_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx9_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+ {gfx9_SECT_CONTEXT_def_8, 0x0000a2f5, 155 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx9_cs_data[] = {
+ { gfx9_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
new file mode 100644
index 000000000..66e39cdb5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
@@ -0,0 +1,941 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const u32 si_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_INFO
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_SLICE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const u32 si_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0xffffffff, // VGT_MAX_VTX_INDX
+ 0x00000000, // VGT_MIN_VTX_INDX
+ 0x00000000, // VGT_INDX_OFFSET
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0, // HOLE
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x00000000, // DB_STENCILREFMASK
+ 0x00000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_WAVE_MGMT_1
+ 0x00000000, // SPI_WAVE_MGMT_2
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const u32 si_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+ 0x00000000, // VGT_DMA_BASE_HI
+ 0x00000000, // VGT_DMA_BASE
+};
+static const u32 si_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0, // HOLE
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const u32 si_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const u32 si_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const u32 si_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0, // HOLE
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000010, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_PITCH
+ 0x00000000, // CB_COLOR0_SLICE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_SLICE
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_SLICE
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_PITCH
+ 0x00000000, // CB_COLOR1_SLICE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_SLICE
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_SLICE
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_PITCH
+ 0x00000000, // CB_COLOR2_SLICE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_SLICE
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_SLICE
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_PITCH
+ 0x00000000, // CB_COLOR3_SLICE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_SLICE
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_SLICE
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_PITCH
+ 0x00000000, // CB_COLOR4_SLICE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_SLICE
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_SLICE
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_PITCH
+ 0x00000000, // CB_COLOR5_SLICE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_SLICE
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_SLICE
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_PITCH
+ 0x00000000, // CB_COLOR6_SLICE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_SLICE
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_SLICE
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_PITCH
+ 0x00000000, // CB_COLOR7_SLICE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_SLICE
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_SLICE
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def si_SECT_CONTEXT_defs[] =
+{
+ {si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+ {si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+ {si_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+ {si_SECT_CONTEXT_def_5, 0x0000a2a1, 1 },
+ {si_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {si_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+ { NULL, 0, 0 }
+};
+static const struct cs_section_def si_cs_data[] = {
+ { si_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { NULL, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h b/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h
new file mode 100644
index 000000000..1aab9bef9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int vi_SECT_CONTEXT_def_1[] =
+{
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0, // HOLE
+ 0x00000000, // DB_DEPTH_INFO
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_SLICE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int vi_SECT_CONTEXT_def_2[] =
+{
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+ 0, // HOLE
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0xffffffff, // VGT_MAX_VTX_INDX
+ 0x00000000, // VGT_MIN_VTX_INDX
+ 0x00000000, // VGT_INDX_OFFSET
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0, // HOLE
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_DCC_CONTROL
+ 0, // HOLE
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x00000000, // DB_STENCILREFMASK
+ 0x00000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int vi_SECT_CONTEXT_def_3[] =
+{
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+ 0x00000000, // VGT_DMA_BASE_HI
+ 0x00000000, // VGT_DMA_BASE
+};
+static const unsigned int vi_SECT_CONTEXT_def_4[] =
+{
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int vi_SECT_CONTEXT_def_5[] =
+{
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int vi_SECT_CONTEXT_def_6[] =
+{
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int vi_SECT_CONTEXT_def_7[] =
+{
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0x00000000, // VGT_DISPATCH_DRAW_INDEX
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_PITCH
+ 0x00000000, // CB_COLOR0_SLICE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_DCC_CONTROL
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_SLICE
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_SLICE
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_PITCH
+ 0x00000000, // CB_COLOR1_SLICE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_DCC_CONTROL
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_SLICE
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_SLICE
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_PITCH
+ 0x00000000, // CB_COLOR2_SLICE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_DCC_CONTROL
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_SLICE
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_SLICE
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_PITCH
+ 0x00000000, // CB_COLOR3_SLICE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_DCC_CONTROL
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_SLICE
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_SLICE
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_PITCH
+ 0x00000000, // CB_COLOR4_SLICE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_DCC_CONTROL
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_SLICE
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_SLICE
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_PITCH
+ 0x00000000, // CB_COLOR5_SLICE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_DCC_CONTROL
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_SLICE
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_SLICE
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_PITCH
+ 0x00000000, // CB_COLOR6_SLICE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_DCC_CONTROL
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_SLICE
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_SLICE
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_PITCH
+ 0x00000000, // CB_COLOR7_SLICE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_DCC_CONTROL
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_SLICE
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_SLICE
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def vi_SECT_CONTEXT_defs[] =
+{
+ {vi_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+ {vi_SECT_CONTEXT_def_2, 0x0000a0d6, 274 },
+ {vi_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+ {vi_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+ {vi_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {vi_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {vi_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def vi_cs_data[] = {
+ { vi_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
new file mode 100644
index 000000000..b8c47e0cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "vid.h"
+
+#include "oss/oss_3_0_1_d.h"
+#include "oss/oss_3_0_1_sh_mask.h"
+
+#include "bif/bif_5_1_d.h"
+#include "bif/bif_5_1_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written. When the
+ * pointers are equal, the ring is idle. When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr. When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * cz_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (VI).
+ */
+static void cz_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
+ WREG32(mmIH_CNTL, ih_cntl);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+/**
+ * cz_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (VI).
+ */
+static void cz_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 0);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ WREG32(mmIH_CNTL, ih_cntl);
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+/**
+ * cz_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int cz_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+ int rb_bufsz;
+
+ /* disable irqs */
+ cz_ih_disable_interrupts(adev);
+
+ /* setup interrupt control */
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+
+ rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+ ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
+
+ /* set the writeback address whether it's enabled or not */
+ WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+ WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+
+ /* Default settings for IH_CNTL (disabled at first) */
+ ih_cntl = RREG32(mmIH_CNTL);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, MC_VMID, 0);
+
+ if (adev->irq.msi_enabled)
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, RPTR_REARM, 1);
+ WREG32(mmIH_CNTL, ih_cntl);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ cz_ih_enable_interrupts(adev);
+
+ return 0;
+}
+
+/**
+ * cz_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VI).
+ */
+static void cz_ih_irq_disable(struct amdgpu_device *adev)
+{
+ cz_ih_disable_interrupts(adev);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * cz_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VI). Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cz_irq_process(VI).
+ * Returns the value of the wptr.
+ */
+static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32(mmIH_RB_WPTR);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 16). Hopefully
+ * this should allow us to catchup.
+ */
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * cz_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to decode
+ * @entry: IV entry to place decoded information into
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void cz_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[4];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ entry->src_id = dw[0] & 0xff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
+ entry->ring_id = dw[2] & 0xff;
+ entry->vmid = (dw[2] >> 8) & 0xff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 16;
+}
+
+/**
+ * cz_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void cz_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ WREG32(mmIH_RB_RPTR, ih->rptr);
+}
+
+static int cz_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret)
+ return ret;
+
+ cz_ih_set_interrupt_funcs(adev);
+
+ return 0;
+}
+
+static int cz_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int cz_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+ amdgpu_irq_remove_domain(adev);
+
+ return 0;
+}
+
+static int cz_ih_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = cz_ih_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int cz_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cz_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int cz_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cz_ih_hw_fini(adev);
+}
+
+static int cz_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return cz_ih_hw_init(adev);
+}
+
+static bool cz_ih_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return false;
+
+ return true;
+}
+
+static int cz_ih_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS);
+ if (!REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int cz_ih_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+ SOFT_RESET_IH, 1);
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int cz_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ // TODO
+ return 0;
+}
+
+static int cz_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ // TODO
+ return 0;
+}
+
+static const struct amd_ip_funcs cz_ih_ip_funcs = {
+ .name = "cz_ih",
+ .early_init = cz_ih_early_init,
+ .late_init = NULL,
+ .sw_init = cz_ih_sw_init,
+ .sw_fini = cz_ih_sw_fini,
+ .hw_init = cz_ih_hw_init,
+ .hw_fini = cz_ih_hw_fini,
+ .suspend = cz_ih_suspend,
+ .resume = cz_ih_resume,
+ .is_idle = cz_ih_is_idle,
+ .wait_for_idle = cz_ih_wait_for_idle,
+ .soft_reset = cz_ih_soft_reset,
+ .set_clockgating_state = cz_ih_set_clockgating_state,
+ .set_powergating_state = cz_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs cz_ih_funcs = {
+ .get_wptr = cz_ih_get_wptr,
+ .decode_iv = cz_ih_decode_iv,
+ .set_rptr = cz_ih_set_rptr
+};
+
+static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &cz_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version cz_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &cz_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.h b/drivers/gpu/drm/amd/amdgpu/cz_ih.h
new file mode 100644
index 000000000..14be77532
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CZ_IH_H__
+#define __CZ_IH_H__
+
+extern const struct amdgpu_ip_block_version cz_ih_ip_block;
+
+#endif /* __CZ_IH_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
new file mode 100644
index 000000000..584cd5277
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -0,0 +1,3660 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "vid.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "dce_v10_0.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+#include "dce/dce_10_0_enum.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[] = {
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
+ CRTC6_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] = {
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+ DIG0_REGISTER_OFFSET,
+ DIG1_REGISTER_OFFSET,
+ DIG2_REGISTER_OFFSET,
+ DIG3_REGISTER_OFFSET,
+ DIG4_REGISTER_OFFSET,
+ DIG5_REGISTER_OFFSET,
+ DIG6_REGISTER_OFFSET
+};
+
+static const struct {
+ uint32_t reg;
+ uint32_t vblank;
+ uint32_t vline;
+ uint32_t hpd;
+
+} interrupt_status_offsets[] = { {
+ .reg = mmDISP_INTERRUPT_STATUS,
+ .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static const u32 golden_settings_tonga_a11[] = {
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x1f311fff, 0x12300000,
+ mmHDMI_CONTROL, 0x31000111, 0x00000011,
+};
+
+static const u32 tonga_mgcg_cgcg_init[] = {
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+};
+
+static const u32 golden_settings_fiji_a10[] = {
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x1f311fff, 0x12300000,
+ mmHDMI_CONTROL, 0x31000111, 0x00000011,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] = {
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+};
+
+static void dce_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(fiji_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_fiji_a10,
+ ARRAY_SIZE(golden_settings_fiji_a10));
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(tonga_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_tonga_a11,
+ ARRAY_SIZE(golden_settings_tonga_a11));
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 dce_v10_0_audio_endpt_rreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+ return r;
+}
+
+static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+ if (crtc >= adev->mode_info.num_crtc)
+ return 0;
+ else
+ return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v10_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v10_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
+ */
+static void dce_v10_0_page_flip(struct amdgpu_device *adev,
+ int crtc_id, u64 crtc_base, bool async)
+{
+ struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+ u32 tmp;
+
+ /* flip at hsync for async, default is vsync */
+ tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+ GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
+ /* update the primary scanout address */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(crtc_base));
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+ u32 *vbl, u32 *position)
+{
+ if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+ return -EINVAL;
+
+ *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+ *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+ return 0;
+}
+
+/**
+ * dce_v10_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v10_0_hpd_sense(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ bool connected = false;
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return connected;
+
+ if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
+ connected = true;
+
+ return connected;
+}
+
+/**
+ * dce_v10_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v10_0_hpd_set_polarity(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ u32 tmp;
+ bool connected = dce_v10_0_hpd_sense(adev, hpd);
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return;
+
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ if (connected)
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v10_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+ connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+ /* don't try to enable hpd on eDP or LVDS avoid breaking the
+ * aux dp channel on imac and help (but not completely fix)
+ * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+ * also avoid interrupt storms during dpms.
+ */
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ continue;
+ }
+
+ tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
+ WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+ DC_HPD_CONNECT_INT_DELAY,
+ AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+ DC_HPD_DISCONNECT_INT_DELAY,
+ AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
+ WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+ amdgpu_irq_get(adev, &adev->hpd_irq,
+ amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v10_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
+ WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ amdgpu_irq_put(adev, &adev->hpd_irq,
+ amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+ return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
+static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev,
+ bool render)
+{
+ u32 tmp;
+
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+}
+
+static int dce_v10_0_get_num_crtc(struct amdgpu_device *adev)
+{
+ int num_crtc = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ num_crtc = 6;
+ break;
+ default:
+ num_crtc = 0;
+ }
+ return num_crtc;
+}
+
+void dce_v10_0_disable_dce(struct amdgpu_device *adev)
+{
+ /*Disable VGA render and enabled crtc, if has DCE engine*/
+ if (amdgpu_atombios_has_dce_engine_info(adev)) {
+ u32 tmp;
+ int crtc_enabled, i;
+
+ dce_v10_0_set_vga_render_state(adev, false);
+
+ /*Disable crtc*/
+ for (i = 0; i < dce_v10_0_get_num_crtc(adev); i++) {
+ crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
+ CRTC_CONTROL, CRTC_MASTER_EN);
+ if (crtc_enabled) {
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
+ WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+ }
+ }
+ }
+}
+
+static void dce_v10_0_program_fmt(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ int bpc = 0;
+ u32 tmp = 0;
+ enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ bpc = amdgpu_connector_get_monitor_bpc(connector);
+ dither = amdgpu_connector->dither;
+ }
+
+ /* LVDS/eDP FMT is set up by atom */
+ if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+ return;
+
+ /* not needed for analog */
+ if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
+ (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
+ return;
+
+ if (bpc == 0)
+ return;
+
+ switch (bpc) {
+ case 6:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
+ }
+ break;
+ case 8:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
+ }
+ break;
+ case 10:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
+ }
+ break;
+ default:
+ /* not needed */
+ break;
+ }
+
+ WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+
+/* display watermark setup */
+/**
+ * dce_v10_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v10_0_line_buffer_adjust(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ struct drm_display_mode *mode)
+{
+ u32 tmp, buffer_alloc, i, mem_cfg;
+ u32 pipe_offset = amdgpu_crtc->crtc_id;
+ /*
+ * Line Buffer Setup
+ * There are 6 line buffers, one for each display controllers.
+ * There are 3 partitions per LB. Select the number of partitions
+ * to enable based on the display width. For display widths larger
+ * than 4096, you need use to use 2 display controllers and combine
+ * them using the stereo blender.
+ */
+ if (amdgpu_crtc->base.enabled && mode) {
+ if (mode->crtc_hdisplay < 1920) {
+ mem_cfg = 1;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 2560) {
+ mem_cfg = 2;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 4096) {
+ mem_cfg = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ } else {
+ DRM_DEBUG_KMS("Mode too big for LB!\n");
+ mem_cfg = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ }
+ } else {
+ mem_cfg = 1;
+ buffer_alloc = 0;
+ }
+
+ tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
+ WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+ tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
+ WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+ if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
+ break;
+ udelay(1);
+ }
+
+ if (amdgpu_crtc->base.enabled && mode) {
+ switch (mem_cfg) {
+ case 0:
+ default:
+ return 4096 * 2;
+ case 1:
+ return 1920 * 2;
+ case 2:
+ return 2560 * 2;
+ }
+ }
+
+ /* controller not enabled, so no lb used */
+ return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+ switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+ case 0:
+ default:
+ return 1;
+ case 1:
+ return 2;
+ case 2:
+ return 4;
+ case 3:
+ return 8;
+ case 4:
+ return 3;
+ case 5:
+ return 6;
+ case 6:
+ return 10;
+ case 7:
+ return 12;
+ case 8:
+ return 16;
+ }
+}
+
+struct dce10_wm_params {
+ u32 dram_channels; /* number of dram channels */
+ u32 yclk; /* bandwidth per dram data pin in kHz */
+ u32 sclk; /* engine clock in kHz */
+ u32 disp_clk; /* display clock in kHz */
+ u32 src_width; /* viewport width */
+ u32 active_time; /* active display time in ns */
+ u32 blank_time; /* blank time in ns */
+ bool interlaced; /* mode is interlaced */
+ fixed20_12 vsc; /* vertical scale ratio */
+ u32 num_heads; /* number of active crtcs */
+ u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+ u32 lb_size; /* line buffer allocated to pipe */
+ u32 vtaps; /* vertical scaler taps */
+};
+
+/**
+ * dce_v10_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_dram_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate raw DRAM Bandwidth */
+ fixed20_12 dram_efficiency; /* 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ dram_efficiency.full = dfixed_const(7);
+ dram_efficiency.full = dfixed_div(dram_efficiency, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v10_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+ /* Calculate DRAM Bandwidth and the part allocated to display. */
+ fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+ disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_data_return_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the display Data return Bandwidth */
+ fixed20_12 return_efficiency; /* 0.8 */
+ fixed20_12 sclk, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ sclk.full = dfixed_const(wm->sclk);
+ sclk.full = dfixed_div(sclk, a);
+ a.full = dfixed_const(10);
+ return_efficiency.full = dfixed_const(8);
+ return_efficiency.full = dfixed_div(return_efficiency, a);
+ a.full = dfixed_const(32);
+ bandwidth.full = dfixed_mul(a, sclk);
+ bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the DMIF Request Bandwidth */
+ fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+ fixed20_12 disp_clk, bandwidth;
+ fixed20_12 a, b;
+
+ a.full = dfixed_const(1000);
+ disp_clk.full = dfixed_const(wm->disp_clk);
+ disp_clk.full = dfixed_div(disp_clk, a);
+ a.full = dfixed_const(32);
+ b.full = dfixed_mul(a, disp_clk);
+
+ a.full = dfixed_const(10);
+ disp_clk_request_efficiency.full = dfixed_const(8);
+ disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+ bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_available_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+ u32 dram_bandwidth = dce_v10_0_dram_bandwidth(wm);
+ u32 data_return_bandwidth = dce_v10_0_data_return_bandwidth(wm);
+ u32 dmif_req_bandwidth = dce_v10_0_dmif_request_bandwidth(wm);
+
+ return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v10_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_average_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the display mode Average Bandwidth
+ * DisplayMode should contain the source and destination dimensions,
+ * timing, etc.
+ */
+ fixed20_12 bpp;
+ fixed20_12 line_time;
+ fixed20_12 src_width;
+ fixed20_12 bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+ line_time.full = dfixed_div(line_time, a);
+ bpp.full = dfixed_const(wm->bytes_per_pixel);
+ src_width.full = dfixed_const(wm->src_width);
+ bandwidth.full = dfixed_mul(src_width, bpp);
+ bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+ bandwidth.full = dfixed_div(bandwidth, line_time);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm)
+{
+ /* First calculate the latency in ns */
+ u32 mc_latency = 2000; /* 2000 ns. */
+ u32 available_bandwidth = dce_v10_0_available_bandwidth(wm);
+ u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+ u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+ u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+ u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+ (wm->num_heads * cursor_line_pair_return_time);
+ u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+ u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+ u32 tmp, dmif_size = 12288;
+ fixed20_12 a, b, c;
+
+ if (wm->num_heads == 0)
+ return 0;
+
+ a.full = dfixed_const(2);
+ b.full = dfixed_const(1);
+ if ((wm->vsc.full > a.full) ||
+ ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+ (wm->vtaps >= 5) ||
+ ((wm->vsc.full >= a.full) && wm->interlaced))
+ max_src_lines_per_dst_line = 4;
+ else
+ max_src_lines_per_dst_line = 2;
+
+ a.full = dfixed_const(available_bandwidth);
+ b.full = dfixed_const(wm->num_heads);
+ a.full = dfixed_div(a, b);
+ tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+ tmp = min(dfixed_trunc(a), tmp);
+
+ lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+ a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+ b.full = dfixed_const(1000);
+ c.full = dfixed_const(lb_fill_bw);
+ b.full = dfixed_div(c, b);
+ a.full = dfixed_div(a, b);
+ line_fill_time = dfixed_trunc(a);
+
+ if (line_fill_time < wm->active_time)
+ return latency;
+ else
+ return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+ if (dce_v10_0_average_bandwidth(wm) <=
+ (dce_v10_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v10_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
+{
+ if (dce_v10_0_average_bandwidth(wm) <=
+ (dce_v10_0_available_bandwidth(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v10_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_check_latency_hiding(struct dce10_wm_params *wm)
+{
+ u32 lb_partitions = wm->lb_size / wm->src_width;
+ u32 line_time = wm->active_time + wm->blank_time;
+ u32 latency_tolerant_lines;
+ u32 latency_hiding;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1);
+ if (wm->vsc.full > a.full)
+ latency_tolerant_lines = 1;
+ else {
+ if (lb_partitions <= (wm->vtaps + 1))
+ latency_tolerant_lines = 1;
+ else
+ latency_tolerant_lines = 2;
+ }
+
+ latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+ if (dce_v10_0_latency_watermark(wm) <= latency_hiding)
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v10_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ u32 lb_size, u32 num_heads)
+{
+ struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+ struct dce10_wm_params wm_low, wm_high;
+ u32 active_time;
+ u32 line_time = 0;
+ u32 latency_watermark_a = 0, latency_watermark_b = 0;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
+
+ if (amdgpu_crtc->base.enabled && num_heads && mode) {
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
+
+ /* watermark for high clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_high.yclk =
+ amdgpu_dpm_get_mclk(adev, false) * 10;
+ wm_high.sclk =
+ amdgpu_dpm_get_sclk(adev, false) * 10;
+ } else {
+ wm_high.yclk = adev->pm.current_mclk * 10;
+ wm_high.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = active_time;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_high.interlaced = true;
+ wm_high.vsc = amdgpu_crtc->vsc;
+ wm_high.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_high.num_heads = num_heads;
+
+ /* set for high clocks */
+ latency_watermark_a = min(dce_v10_0_latency_watermark(&wm_high), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce_v10_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce_v10_0_check_latency_hiding(&wm_high) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+
+ /* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_low.yclk =
+ amdgpu_dpm_get_mclk(adev, true) * 10;
+ wm_low.sclk =
+ amdgpu_dpm_get_sclk(adev, true) * 10;
+ } else {
+ wm_low.yclk = adev->pm.current_mclk * 10;
+ wm_low.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = active_time;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = amdgpu_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_low.num_heads = num_heads;
+
+ /* set for low clocks */
+ latency_watermark_b = min(dce_v10_0_latency_watermark(&wm_low), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce_v10_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce_v10_0_check_latency_hiding(&wm_low) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+ }
+
+ /* select wm A */
+ wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* select wm B */
+ tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* restore original selection */
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
+
+ /* save values for DPM */
+ amdgpu_crtc->line_time = line_time;
+ amdgpu_crtc->wm_high = latency_watermark_a;
+ amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/**
+ * dce_v10_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev)
+{
+ struct drm_display_mode *mode = NULL;
+ u32 num_heads = 0, lb_size;
+ int i;
+
+ amdgpu_display_update_priority(adev);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i]->base.enabled)
+ num_heads++;
+ }
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ mode = &adev->mode_info.crtcs[i]->base.mode;
+ lb_size = dce_v10_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
+ dce_v10_0_program_watermarks(adev, adev->mode_info.crtcs[i],
+ lb_size, num_heads);
+ }
+}
+
+static void dce_v10_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+ int i;
+ u32 offset, tmp;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ offset = adev->mode_info.audio.pin[i].offset;
+ tmp = RREG32_AUDIO_ENDPT(offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+ if (((tmp &
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
+ adev->mode_info.audio.pin[i].connected = false;
+ else
+ adev->mode_info.audio.pin[i].connected = true;
+ }
+}
+
+static struct amdgpu_audio_pin *dce_v10_0_audio_get_pin(struct amdgpu_device *adev)
+{
+ int i;
+
+ dce_v10_0_audio_get_connected_pins(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ if (adev->mode_info.audio.pin[i].connected)
+ return &adev->mode_info.audio.pin[i];
+ }
+ DRM_ERROR("No connected audio pins found!\n");
+ return NULL;
+}
+
+static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
+ WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 tmp;
+ int interlace = 0;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ interlace = 1;
+ if (connector->latency_present[interlace]) {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, connector->video_latency[interlace]);
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+ } else {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, 0);
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, 0);
+ }
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 tmp;
+ u8 *sadb = NULL;
+ int sad_count;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ if (sad_count < 0) {
+ DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
+ }
+
+ /* program the speaker allocation */
+ tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ DP_CONNECTION, 0);
+ /* set HDMI mode */
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ HDMI_CONNECTION, 1);
+ if (sad_count)
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, sadb[0]);
+ else
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, 5); /* stereo */
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+ kfree(sadb);
+}
+
+static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ struct cea_sad *sads;
+ int i, sad_count;
+
+ static const u16 eld_reg_to_type[][2] = {
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+ };
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ if (sad_count < 0)
+ DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
+ return;
+ BUG_ON(!sads);
+
+ for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+ u32 tmp = 0;
+ u8 stereo_freqs = 0;
+ int max_channels = -1;
+ int j;
+
+ for (j = 0; j < sad_count; j++) {
+ struct cea_sad *sad = &sads[j];
+
+ if (sad->format == eld_reg_to_type[i][1]) {
+ if (sad->channels > max_channels) {
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ MAX_CHANNELS, sad->channels);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ DESCRIPTOR_BYTE_2, sad->byte2);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES, sad->freq);
+ max_channels = sad->channels;
+ }
+
+ if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+ stereo_freqs |= sad->freq;
+ else
+ break;
+ }
+ }
+
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ }
+
+ kfree(sads);
+}
+
+static void dce_v10_0_audio_enable(struct amdgpu_device *adev,
+ struct amdgpu_audio_pin *pin,
+ bool enable)
+{
+ if (!pin)
+ return;
+
+ WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+ enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[] = {
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
+};
+
+static int dce_v10_0_audio_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return 0;
+
+ adev->mode_info.audio.enabled = true;
+
+ adev->mode_info.audio.num_pins = 7;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ adev->mode_info.audio.pin[i].channels = -1;
+ adev->mode_info.audio.pin[i].rate = -1;
+ adev->mode_info.audio.pin[i].bits_per_sample = -1;
+ adev->mode_info.audio.pin[i].status_bits = 0;
+ adev->mode_info.audio.pin[i].category_code = 0;
+ adev->mode_info.audio.pin[i].connected = false;
+ adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+ adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
+ dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ return 0;
+}
+
+static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return;
+
+ if (!adev->mode_info.audio.enabled)
+ return;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++)
+ dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+
+ adev->mode_info.audio.enabled = false;
+}
+
+/*
+ * update the N and CTS parameters for a given pixel clock rate
+ */
+static void dce_v10_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+ WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+ WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+ WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+ WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+ WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+ WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+
+}
+
+/*
+ * build a HDMI Video Info Frame
+ */
+static void dce_v10_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
+ void *buffer, size_t size)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ uint8_t *frame = buffer + 3;
+ uint8_t *header = buffer;
+
+ WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+ frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
+ WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+ frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
+ WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+ frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
+ WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+ frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
+}
+
+static void dce_v10_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ u32 dto_phase = 24 * 1000;
+ u32 dto_modulo = clock;
+ u32 tmp;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* XXX two dtos; generally use dto0 for hdmi */
+ /* Express [24MHz / target pixel clock] as an exact rational
+ * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
+ * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+ */
+ tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+ tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
+ amdgpu_crtc->crtc_id);
+ WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+ WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
+ WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+ struct hdmi_avi_infoframe frame;
+ ssize_t err;
+ u32 tmp;
+ int bpc = 8;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (!dig->afmt->enabled)
+ return;
+
+ /* hdmi deep color mode general control packets setup, if bpc > 8 */
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ bpc = amdgpu_crtc->bpc;
+ }
+
+ /* disable audio prior to setting up hw */
+ dig->afmt->pin = dce_v10_0_audio_get_pin(adev);
+ dce_v10_0_audio_enable(adev, dig->afmt->pin, false);
+
+ dce_v10_0_audio_set_dto(encoder, mode->clock);
+
+ tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
+
+ WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
+
+ tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
+ switch (bpc) {
+ case 0:
+ case 6:
+ case 8:
+ case 16:
+ default:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+ connector->name, bpc);
+ break;
+ case 10:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
+ DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+ connector->name);
+ break;
+ case 12:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
+ DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+ connector->name);
+ break;
+ }
+ WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* enable audio info frames (frames won't be set until audio is enabled) */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+ WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ /* anything other than 0 */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+ WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
+
+ tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* set the default audio delay */
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+ /* should be suffient for all audio modes and small enough for all hblanks */
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+ WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* allow 60958 channel status fields to be updated */
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+ if (bpc > 8)
+ /* clear SW CTS value */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
+ else
+ /* select SW CTS value */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
+ /* allow hw to sent ACR packets when required */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+ WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ dce_v10_0_afmt_update_ACR(encoder, mode->clock);
+
+ tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+ WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+ WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+ WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+ dce_v10_0_audio_write_speaker_allocation(encoder);
+
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
+ (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
+
+ dce_v10_0_afmt_audio_select_pin(encoder);
+ dce_v10_0_audio_write_sad_regs(encoder);
+ dce_v10_0_audio_write_latency_fields(encoder, mode);
+
+ err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+ if (err < 0) {
+ DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+ if (err < 0) {
+ DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ dce_v10_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* enable AVI info frames */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* send audio packets */
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
+ WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
+
+ /* enable audio after to setting up hw */
+ dce_v10_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (enable && dig->afmt->enabled)
+ return;
+ if (!enable && !dig->afmt->enabled)
+ return;
+
+ if (!enable && dig->afmt->pin) {
+ dce_v10_0_audio_enable(adev, dig->afmt->pin, false);
+ dig->afmt->pin = NULL;
+ }
+
+ dig->afmt->enabled = enable;
+
+ DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+ enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++)
+ adev->mode_info.afmt[i] = NULL;
+
+ /* DCE10 has audio blocks tied to DIG encoders */
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+ if (adev->mode_info.afmt[i]) {
+ adev->mode_info.afmt[i]->offset = dig_offsets[i];
+ adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ kfree(adev->mode_info.afmt[i]);
+ adev->mode_info.afmt[i] = NULL;
+ }
+}
+
+static const u32 vga_control_regs[6] = {
+ mmD1VGA_CONTROL,
+ mmD2VGA_CONTROL,
+ mmD3VGA_CONTROL,
+ mmD4VGA_CONTROL,
+ mmD5VGA_CONTROL,
+ mmD6VGA_CONTROL,
+};
+
+static void dce_v10_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 vga_control;
+
+ vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+ if (enable)
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
+ else
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
+}
+
+static void dce_v10_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (enable)
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
+ else
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
+}
+
+static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, int atomic)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_framebuffer *target_fb;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *abo;
+ uint64_t fb_location, tiling_flags;
+ uint32_t fb_format, fb_pitch_pixels;
+ u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
+ u32 pipe_config;
+ u32 tmp, viewport_w, viewport_h;
+ int r;
+ bool bypass_lut = false;
+
+ /* no fb bound */
+ if (!atomic && !crtc->primary->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ if (atomic)
+ target_fb = fb;
+ else
+ target_fb = crtc->primary->fb;
+
+ /* If atomic, assume fb object is pinned & idle & fenced and
+ * just update base pointers
+ */
+ obj = target_fb->obj[0];
+ abo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(abo, false);
+ if (unlikely(r != 0))
+ return r;
+
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(abo);
+ return -EINVAL;
+ }
+ }
+ fb_location = amdgpu_bo_gpu_offset(abo);
+
+ amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+ amdgpu_bo_unreserve(abo);
+
+ pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+
+ switch (target_fb->format->format) {
+ case DRM_FORMAT_C8:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ break;
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_BGRX5551:
+ case DRM_FORMAT_BGRA5551:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_RGB565:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_BGRX1010102:
+ case DRM_FORMAT_BGRA1010102:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
+ default:
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
+ return -EINVAL;
+ }
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+ unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+ bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+ num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+ ARRAY_2D_TILED_THIN1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
+ tile_split);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
+ mtaspect);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
+ ADDR_SURF_MICRO_TILING_DISPLAY);
+ } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+ ARRAY_1D_TILED_THIN1);
+ }
+
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
+ pipe_config);
+
+ dce_v10_0_vga_enable(crtc, false);
+
+ /* Make sure surface address is updated at vertical blank rather than
+ * horizontal blank
+ */
+ tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+ GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+ WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+ /*
+ * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+ * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+ * retain the full precision throughout the pipeline.
+ */
+ tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
+ if (bypass_lut)
+ tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
+ WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
+
+ if (bypass_lut)
+ DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+ WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+ WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+ fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+ dce_v10_0_grph_enable(crtc, true);
+
+ WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+ target_fb->height);
+
+ x &= ~3;
+ y &= ~1;
+ WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+ (x << 16) | y);
+ viewport_w = crtc->mode.hdisplay;
+ viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+ WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+ (viewport_w << 16) | viewport_h);
+
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+ if (!atomic && fb && fb != crtc->primary->fb) {
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r != 0))
+ return r;
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+
+ /* Bytes per pixel may have changed */
+ dce_v10_0_bandwidth_update(adev);
+
+ return 0;
+}
+
+static void dce_v10_0_set_interleave(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ u32 tmp;
+
+ tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
+ WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u16 *r, *g, *b;
+ int i;
+ u32 tmp;
+
+ DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+ tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_OVL_MODE, 0);
+ WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
+ WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, PRESCALE_OVL_CONTROL, OVL_PRESCALE_BYPASS, 1);
+ WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, OVL_INPUT_GAMMA_MODE, 0);
+ WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+ WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+ WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+ r = crtc->gamma_store;
+ g = r + crtc->gamma_size;
+ b = g + crtc->gamma_size;
+ for (i = 0; i < 256; i++) {
+ WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+ ((*r++ & 0xffc0) << 14) |
+ ((*g++ & 0xffc0) << 4) |
+ (*b++ >> 6));
+ }
+
+ tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, OVL_DEGAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
+ WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, OVL_GAMUT_REMAP_MODE, 0);
+ WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, OVL_REGAMMA_MODE, 0);
+ WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_OVL_MODE, 0);
+ WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ /* XXX match this to the depth of the crtc fmt block, move to modeset? */
+ WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
+ /* XXX this only needs to be programmed once per crtc at startup,
+ * not sure where the best place for it is
+ */
+ tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
+ WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v10_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ return 1;
+ else
+ return 0;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ return 3;
+ else
+ return 2;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ return 5;
+ else
+ return 4;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ return 6;
+ default:
+ DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+ return 0;
+ }
+}
+
+/**
+ * dce_v10_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders. For non-DP
+ * monitors a dedicated PPLL must be used. If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself. If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ * Asic specific PLL information
+ *
+ * DCE 10.x
+ * Tonga
+ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
+ * CI
+ * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+ *
+ */
+static u32 dce_v10_0_pick_pll(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 pll_in_use;
+ int pll;
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+ if (adev->clock.dp_extclk)
+ /* skip PPLL programming if using ext clock */
+ return ATOM_PPLL_INVALID;
+ else {
+ /* use the same PPLL for all DP monitors */
+ pll = amdgpu_pll_get_shared_dp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+ } else {
+ /* use the same PPLL for all monitors with the same clock */
+ pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+
+ /* DCE10 has PPLL0, PPLL1, and PPLL2 */
+ pll_in_use = amdgpu_pll_get_use_mask(crtc);
+ if (!(pll_in_use & (1 << ATOM_PPLL2)))
+ return ATOM_PPLL2;
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ if (!(pll_in_use & (1 << ATOM_PPLL0)))
+ return ATOM_PPLL0;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+}
+
+static void dce_v10_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint32_t cur_lock;
+
+ cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+ if (lock)
+ cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
+ else
+ cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
+ WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v10_0_hide_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ u32 tmp;
+
+ tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v10_0_show_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ u32 tmp;
+
+ WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(amdgpu_crtc->cursor_addr));
+ WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(amdgpu_crtc->cursor_addr));
+
+ tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,
+ int x, int y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ int xorigin = 0, yorigin = 0;
+
+ amdgpu_crtc->cursor_x = x;
+ amdgpu_crtc->cursor_y = y;
+
+ /* avivo cursor are offset into the total surface */
+ x += crtc->x;
+ y += crtc->y;
+ DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+ if (x < 0) {
+ xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+ x = 0;
+ }
+ if (y < 0) {
+ yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+ y = 0;
+ }
+
+ WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+ WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+ WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+ ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+ return 0;
+}
+
+static int dce_v10_0_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int ret;
+
+ dce_v10_0_lock_cursor(crtc, true);
+ ret = dce_v10_0_cursor_move_locked(crtc, x, y);
+ dce_v10_0_lock_cursor(crtc, false);
+
+ return ret;
+}
+
+static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *aobj;
+ int ret;
+
+ if (!handle) {
+ /* turn off cursor */
+ dce_v10_0_hide_cursor(crtc);
+ obj = NULL;
+ goto unpin;
+ }
+
+ if ((width > amdgpu_crtc->max_cursor_width) ||
+ (height > amdgpu_crtc->max_cursor_height)) {
+ DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+ return -ENOENT;
+ }
+
+ aobj = gem_to_amdgpu_bo(obj);
+ ret = amdgpu_bo_reserve(aobj, false);
+ if (ret != 0) {
+ drm_gem_object_put(obj);
+ return ret;
+ }
+
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_bo_unreserve(aobj);
+ if (ret) {
+ DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+ drm_gem_object_put(obj);
+ return ret;
+ }
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+ dce_v10_0_lock_cursor(crtc, true);
+
+ if (width != amdgpu_crtc->cursor_width ||
+ height != amdgpu_crtc->cursor_height ||
+ hot_x != amdgpu_crtc->cursor_hot_x ||
+ hot_y != amdgpu_crtc->cursor_hot_y) {
+ int x, y;
+
+ x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+ y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+ dce_v10_0_cursor_move_locked(crtc, x, y);
+
+ amdgpu_crtc->cursor_width = width;
+ amdgpu_crtc->cursor_height = height;
+ amdgpu_crtc->cursor_hot_x = hot_x;
+ amdgpu_crtc->cursor_hot_y = hot_y;
+ }
+
+ dce_v10_0_show_cursor(crtc);
+ dce_v10_0_lock_cursor(crtc, false);
+
+unpin:
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ ret = amdgpu_bo_reserve(aobj, true);
+ if (likely(ret == 0)) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ drm_gem_object_put(amdgpu_crtc->cursor_bo);
+ }
+
+ amdgpu_crtc->cursor_bo = obj;
+ return 0;
+}
+
+static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo) {
+ dce_v10_0_lock_cursor(crtc, true);
+
+ dce_v10_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+ amdgpu_crtc->cursor_y);
+
+ dce_v10_0_show_cursor(crtc);
+
+ dce_v10_0_lock_cursor(crtc, false);
+ }
+}
+
+static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ dce_v10_0_crtc_load_lut(crtc);
+
+ return 0;
+}
+
+static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_crtc_cleanup(crtc);
+ kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
+ .cursor_set2 = dce_v10_0_crtc_cursor_set2,
+ .cursor_move = dce_v10_0_crtc_cursor_move,
+ .gamma_set = dce_v10_0_crtc_gamma_set,
+ .set_config = amdgpu_display_crtc_set_config,
+ .destroy = dce_v10_0_crtc_destroy,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
+ .get_vblank_counter = amdgpu_get_vblank_counter_kms,
+ .enable_vblank = amdgpu_enable_vblank_kms,
+ .disable_vblank = amdgpu_disable_vblank_kms,
+ .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ unsigned type;
+
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_crtc->enabled = true;
+ amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+ dce_v10_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+ dce_v10_0_vga_enable(crtc, false);
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
+ amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+ drm_crtc_vblank_on(crtc);
+ dce_v10_0_crtc_load_lut(crtc);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ drm_crtc_vblank_off(crtc);
+ if (amdgpu_crtc->enabled) {
+ dce_v10_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+ dce_v10_0_vga_enable(crtc, false);
+ }
+ amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+ amdgpu_crtc->enabled = false;
+ break;
+ }
+ /* adjust pm to dpms */
+ amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v10_0_crtc_prepare(struct drm_crtc *crtc)
+{
+ /* disable crtc pair power gating before programming */
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+ dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v10_0_crtc_commit(struct drm_crtc *crtc)
+{
+ dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_atom_ss ss;
+ int i;
+
+ dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+ if (crtc->primary->fb) {
+ int r;
+ struct amdgpu_bo *abo;
+
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r))
+ DRM_ERROR("failed to reserve abo before unpin\n");
+ else {
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+ }
+ /* disable the GRPH */
+ dce_v10_0_grph_enable(crtc, false);
+
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i] &&
+ adev->mode_info.crtcs[i]->enabled &&
+ i != amdgpu_crtc->crtc_id &&
+ amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+ /* one other crtc is using this pll don't turn
+ * off the pll
+ */
+ goto done;
+ }
+ }
+
+ switch (amdgpu_crtc->pll_id) {
+ case ATOM_PPLL0:
+ case ATOM_PPLL1:
+ case ATOM_PPLL2:
+ /* disable the ppll */
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ default:
+ break;
+ }
+done:
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v10_0_crtc_mode_set(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode,
+ int x, int y, struct drm_framebuffer *old_fb)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!amdgpu_crtc->adjusted_clock)
+ return -EINVAL;
+
+ amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+ amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+ dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+ amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+ amdgpu_atombios_crtc_scaler_setup(crtc);
+ dce_v10_0_cursor_reset(crtc);
+ /* update the hw version fpr dpm */
+ amdgpu_crtc->hw_mode = *adjusted_mode;
+
+ return 0;
+}
+
+static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct drm_encoder *encoder;
+
+ /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->crtc == crtc) {
+ amdgpu_crtc->encoder = encoder;
+ amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+ break;
+ }
+ }
+ if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ return false;
+ }
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ return false;
+ if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+ return false;
+ /* pick pll */
+ amdgpu_crtc->pll_id = dce_v10_0_pick_pll(crtc);
+ /* if we can't get a PPLL for a non-DP encoder, fail */
+ if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+ !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+ return false;
+
+ return true;
+}
+
+static int dce_v10_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)
+{
+ return dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v10_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, enum mode_set_atomic state)
+{
+ return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
+ .dpms = dce_v10_0_crtc_dpms,
+ .mode_fixup = dce_v10_0_crtc_mode_fixup,
+ .mode_set = dce_v10_0_crtc_mode_set,
+ .mode_set_base = dce_v10_0_crtc_set_base,
+ .mode_set_base_atomic = dce_v10_0_crtc_set_base_atomic,
+ .prepare = dce_v10_0_crtc_prepare,
+ .commit = dce_v10_0_crtc_commit,
+ .disable = dce_v10_0_crtc_disable,
+ .get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+ (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+ if (amdgpu_crtc == NULL)
+ return -ENOMEM;
+
+ drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v10_0_crtc_funcs);
+
+ drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+ amdgpu_crtc->crtc_id = index;
+ adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+ amdgpu_crtc->max_cursor_width = 128;
+ amdgpu_crtc->max_cursor_height = 128;
+ adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+ adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+ switch (amdgpu_crtc->crtc_id) {
+ case 0:
+ default:
+ amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
+ break;
+ case 1:
+ amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
+ break;
+ case 2:
+ amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
+ break;
+ case 3:
+ amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
+ break;
+ case 4:
+ amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
+ break;
+ case 5:
+ amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
+ break;
+ }
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+
+ return 0;
+}
+
+static int dce_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
+ adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
+
+ dce_v10_0_set_display_funcs(adev);
+
+ adev->mode_info.num_crtc = dce_v10_0_get_num_crtc(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 7;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ dce_v10_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int dce_v10_0_sw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ if (r)
+ return r;
+ }
+
+ /* HPD hotplug */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ /* allocate crtcs */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = dce_v10_0_crtc_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+ amdgpu_display_print_display_setup(adev_to_drm(adev));
+ else
+ return -EINVAL;
+
+ /* setup afmt */
+ r = dce_v10_0_afmt_init(adev);
+ if (r)
+ return r;
+
+ r = dce_v10_0_audio_init(adev);
+ if (r)
+ return r;
+
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
+}
+
+static int dce_v10_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ kfree(adev->mode_info.bios_hardcoded_edid);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+ dce_v10_0_audio_fini(adev);
+
+ dce_v10_0_afmt_fini(adev);
+
+ drm_mode_config_cleanup(adev_to_drm(adev));
+ adev->mode_info.mode_config_initialized = false;
+
+ return 0;
+}
+
+static int dce_v10_0_hw_init(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v10_0_init_golden_registers(adev);
+
+ /* disable vga render */
+ dce_v10_0_set_vga_render_state(adev, false);
+ /* init dig PHYs, disp eng pll */
+ amdgpu_atombios_encoder_init_dig(adev);
+ amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+ /* initialize hpd */
+ dce_v10_0_hpd_init(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v10_0_pageflip_interrupt_init(adev);
+
+ return 0;
+}
+
+static int dce_v10_0_hw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v10_0_hpd_fini(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v10_0_pageflip_interrupt_fini(adev);
+
+ flush_delayed_work(&adev->hotplug_work);
+
+ return 0;
+}
+
+static int dce_v10_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+ return dce_v10_0_hw_fini(handle);
+}
+
+static int dce_v10_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
+ ret = dce_v10_0_hw_init(handle);
+
+ /* turn on the BL */
+ if (adev->mode_info.bl_encoder) {
+ u8 bl_level = amdgpu_display_backlight_get_level(adev,
+ adev->mode_info.bl_encoder);
+ amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+ bl_level);
+ }
+ if (ret)
+ return ret;
+
+ return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v10_0_is_idle(void *handle)
+{
+ return true;
+}
+
+static int dce_v10_0_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static bool dce_v10_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return dce_v10_0_is_display_hung(adev);
+}
+
+static int dce_v10_0_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (dce_v10_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void dce_v10_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VBLANK_INTERRUPT_MASK, 0);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VBLANK_INTERRUPT_MASK, 1);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dce_v10_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VLINE_INTERRUPT_MASK, 0);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VLINE_INTERRUPT_MASK, 1);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned hpd,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", hpd);
+ return 0;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v10_0_set_crtc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CRTC_IRQ_VBLANK1:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK2:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK3:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK4:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK5:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK6:
+ dce_v10_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE1:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE2:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE3:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE4:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE5:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE6:
+ dce_v10_0_set_crtc_vline_interrupt_state(adev, 5, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int dce_v10_0_set_pageflip_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg;
+
+ if (type >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", type);
+ return -EINVAL;
+ }
+
+ reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+ if (state == AMDGPU_IRQ_STATE_DISABLE)
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+ else
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+ return 0;
+}
+
+static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned long flags;
+ unsigned crtc_id;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_flip_work *works;
+
+ crtc_id = (entry->src_id - 8) >> 1;
+ amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+ if (crtc_id >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+ return -EINVAL;
+ }
+
+ if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+ WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+ /* IRQ could occur when in initial stage */
+ if (amdgpu_crtc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ works = amdgpu_crtc->pflip_works;
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+ "AMDGPU_FLIP_SUBMITTED(%d)\n",
+ amdgpu_crtc->pflip_status,
+ AMDGPU_FLIP_SUBMITTED);
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ return 0;
+ }
+
+ /* page flip completed. clean up */
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+ amdgpu_crtc->pflip_works = NULL;
+
+ /* wakeup usersapce */
+ if (works->event)
+ drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+ drm_crtc_vblank_put(&amdgpu_crtc->base);
+ schedule_work(&works->unpin_work);
+
+ return 0;
+}
+
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+static void dce_v10_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
+ int crtc)
+{
+ u32 tmp;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
+ tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
+ WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static void dce_v10_0_crtc_vline_int_ack(struct amdgpu_device *adev,
+ int crtc)
+{
+ u32 tmp;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
+ tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
+ WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned crtc = entry->src_id - 1;
+ uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc);
+
+ switch (entry->src_data[0]) {
+ case 0: /* vblank */
+ if (disp_int & interrupt_status_offsets[crtc].vblank)
+ dce_v10_0_crtc_vblank_int_ack(adev, crtc);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ if (amdgpu_irq_enabled(adev, source, irq_type)) {
+ drm_handle_vblank(adev_to_drm(adev), crtc);
+ }
+ DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
+ break;
+ case 1: /* vline */
+ if (disp_int & interrupt_status_offsets[crtc].vline)
+ dce_v10_0_crtc_vline_int_ack(adev, crtc);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
+ break;
+ default:
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t disp_int, mask;
+ unsigned hpd;
+
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ return 0;
+ }
+
+ hpd = entry->src_data[0];
+ disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+ mask = interrupt_status_offsets[hpd].hpd;
+
+ if (disp_int & mask) {
+ dce_v10_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
+ DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+ }
+
+ return 0;
+}
+
+static int dce_v10_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int dce_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
+ .name = "dce_v10_0",
+ .early_init = dce_v10_0_early_init,
+ .late_init = NULL,
+ .sw_init = dce_v10_0_sw_init,
+ .sw_fini = dce_v10_0_sw_fini,
+ .hw_init = dce_v10_0_hw_init,
+ .hw_fini = dce_v10_0_hw_fini,
+ .suspend = dce_v10_0_suspend,
+ .resume = dce_v10_0_resume,
+ .is_idle = dce_v10_0_is_idle,
+ .wait_for_idle = dce_v10_0_wait_for_idle,
+ .check_soft_reset = dce_v10_0_check_soft_reset,
+ .soft_reset = dce_v10_0_soft_reset,
+ .set_clockgating_state = dce_v10_0_set_clockgating_state,
+ .set_powergating_state = dce_v10_0_set_powergating_state,
+};
+
+static void
+dce_v10_0_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+ /* need to call this here rather than in prepare() since we need some crtc info */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ /* set scaler clears this on some chips */
+ dce_v10_0_set_interleave(encoder->crtc, mode);
+
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+ dce_v10_0_afmt_enable(encoder, true);
+ dce_v10_0_afmt_setmode(encoder, adjusted_mode);
+ }
+}
+
+static void dce_v10_0_encoder_prepare(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if ((amdgpu_encoder->active_device &
+ (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+ ENCODER_OBJECT_ID_NONE)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ if (dig) {
+ dig->dig_encoder = dce_v10_0_pick_dig_encoder(encoder);
+ if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+ dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+ }
+ }
+
+ amdgpu_atombios_scratch_regs_lock(adev, true);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* select the clock/data port if it uses a router */
+ if (amdgpu_connector->router.cd_valid)
+ amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+ /* turn eDP panel on for mode set */
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ }
+
+ /* this is needed for the pll/ss setup to work correctly in some cases */
+ amdgpu_atombios_encoder_set_crtc_source(encoder);
+ /* set up the FMT blocks */
+ dce_v10_0_program_fmt(encoder);
+}
+
+static void dce_v10_0_encoder_commit(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* need to call this here as we need the crtc set up */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v10_0_encoder_disable(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig;
+
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ if (amdgpu_atombios_encoder_is_digital(encoder)) {
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+ dce_v10_0_afmt_enable(encoder, false);
+ dig = amdgpu_encoder->enc_priv;
+ dig->dig_encoder = -1;
+ }
+ amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v10_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v10_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v10_0_ext_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v10_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v10_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static const struct drm_encoder_helper_funcs dce_v10_0_ext_helper_funcs = {
+ .dpms = dce_v10_0_ext_dpms,
+ .prepare = dce_v10_0_ext_prepare,
+ .mode_set = dce_v10_0_ext_mode_set,
+ .commit = dce_v10_0_ext_commit,
+ .disable = dce_v10_0_ext_disable,
+ /* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v10_0_dig_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v10_0_encoder_prepare,
+ .mode_set = dce_v10_0_encoder_mode_set,
+ .commit = dce_v10_0_encoder_commit,
+ .disable = dce_v10_0_encoder_disable,
+ .detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v10_0_dac_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v10_0_encoder_prepare,
+ .mode_set = dce_v10_0_encoder_mode_set,
+ .commit = dce_v10_0_encoder_commit,
+ .detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v10_0_encoder_destroy(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+ kfree(amdgpu_encoder->enc_priv);
+ drm_encoder_cleanup(encoder);
+ kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v10_0_encoder_funcs = {
+ .destroy = dce_v10_0_encoder_destroy,
+};
+
+static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
+ uint32_t encoder_enum,
+ uint32_t supported_device,
+ u16 caps)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ /* see if we already added it */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->encoder_enum == encoder_enum) {
+ amdgpu_encoder->devices |= supported_device;
+ return;
+ }
+
+ }
+
+ /* add a new one */
+ amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+ if (!amdgpu_encoder)
+ return;
+
+ encoder = &amdgpu_encoder->base;
+ switch (adev->mode_info.num_crtc) {
+ case 1:
+ encoder->possible_crtcs = 0x1;
+ break;
+ case 2:
+ default:
+ encoder->possible_crtcs = 0x3;
+ break;
+ case 4:
+ encoder->possible_crtcs = 0xf;
+ break;
+ case 6:
+ encoder->possible_crtcs = 0x3f;
+ break;
+ }
+
+ amdgpu_encoder->enc_priv = NULL;
+
+ amdgpu_encoder->encoder_enum = encoder_enum;
+ amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ amdgpu_encoder->devices = supported_device;
+ amdgpu_encoder->rmx_type = RMX_OFF;
+ amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+ amdgpu_encoder->is_ext_encoder = false;
+ amdgpu_encoder->caps = caps;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ drm_encoder_helper_add(encoder, &dce_v10_0_dac_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ amdgpu_encoder->rmx_type = RMX_FULL;
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ } else {
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ }
+ drm_encoder_helper_add(encoder, &dce_v10_0_dig_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_SI170B:
+ case ENCODER_OBJECT_ID_CH7303:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+ case ENCODER_OBJECT_ID_TITFP513:
+ case ENCODER_OBJECT_ID_VT1623:
+ case ENCODER_OBJECT_ID_HDMI_SI1930:
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ /* these are handled by the primary encoders */
+ amdgpu_encoder->is_ext_encoder = true;
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ else
+ drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ drm_encoder_helper_add(encoder, &dce_v10_0_ext_helper_funcs);
+ break;
+ }
+}
+
+static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
+ .bandwidth_update = &dce_v10_0_bandwidth_update,
+ .vblank_get_counter = &dce_v10_0_vblank_get_counter,
+ .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+ .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+ .hpd_sense = &dce_v10_0_hpd_sense,
+ .hpd_set_polarity = &dce_v10_0_hpd_set_polarity,
+ .hpd_get_gpio_reg = &dce_v10_0_hpd_get_gpio_reg,
+ .page_flip = &dce_v10_0_page_flip,
+ .page_flip_get_scanoutpos = &dce_v10_0_crtc_get_scanoutpos,
+ .add_encoder = &dce_v10_0_encoder_add,
+ .add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
+{
+ adev->mode_info.funcs = &dce_v10_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = {
+ .set = dce_v10_0_set_crtc_irq_state,
+ .process = dce_v10_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_pageflip_irq_funcs = {
+ .set = dce_v10_0_set_pageflip_irq_state,
+ .process = dce_v10_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_hpd_irq_funcs = {
+ .set = dce_v10_0_set_hpd_irq_state,
+ .process = dce_v10_0_hpd_irq,
+};
+
+static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.num_crtc > 0)
+ adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+ else
+ adev->crtc_irq.num_types = 0;
+ adev->crtc_irq.funcs = &dce_v10_0_crtc_irq_funcs;
+
+ adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+ adev->pageflip_irq.funcs = &dce_v10_0_pageflip_irq_funcs;
+
+ adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+ adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v10_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &dce_v10_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v10_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 10,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &dce_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
new file mode 100644
index 000000000..7a0747789
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V10_0_H__
+#define __DCE_V10_0_H__
+
+
+extern const struct amdgpu_ip_block_version dce_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v10_1_ip_block;
+
+void dce_v10_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
new file mode 100644
index 000000000..c14b70350
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -0,0 +1,3799 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "vid.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "dce_v11_0.h"
+
+#include "dce/dce_11_0_d.h"
+#include "dce/dce_11_0_sh_mask.h"
+#include "dce/dce_11_0_enum.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[] =
+{
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
+ CRTC6_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] =
+{
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+ DIG0_REGISTER_OFFSET,
+ DIG1_REGISTER_OFFSET,
+ DIG2_REGISTER_OFFSET,
+ DIG3_REGISTER_OFFSET,
+ DIG4_REGISTER_OFFSET,
+ DIG5_REGISTER_OFFSET,
+ DIG6_REGISTER_OFFSET,
+ DIG7_REGISTER_OFFSET,
+ DIG8_REGISTER_OFFSET
+};
+
+static const struct {
+ uint32_t reg;
+ uint32_t vblank;
+ uint32_t vline;
+ uint32_t hpd;
+
+} interrupt_status_offsets[] = { {
+ .reg = mmDISP_INTERRUPT_STATUS,
+ .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static const u32 cz_golden_settings_a11[] =
+{
+ mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
+ mmFBC_MISC, 0x1f311fff, 0x14300000,
+};
+
+static const u32 cz_mgcg_cgcg_init[] =
+{
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+};
+
+static const u32 stoney_golden_settings_a11[] =
+{
+ mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
+ mmFBC_MISC, 0x1f311fff, 0x14302000,
+};
+
+static const u32 polaris11_golden_settings_a11[] =
+{
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_DEBUG1, 0xffffffff, 0x00000008,
+ mmFBC_MISC, 0x9f313fff, 0x14302008,
+ mmHDMI_CONTROL, 0x313f031f, 0x00000011,
+};
+
+static const u32 polaris10_golden_settings_a11[] =
+{
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x9f313fff, 0x14302008,
+ mmHDMI_CONTROL, 0x313f031f, 0x00000011,
+};
+
+static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ amdgpu_device_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+ ARRAY_SIZE(cz_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ cz_golden_settings_a11,
+ ARRAY_SIZE(cz_golden_settings_a11));
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_program_register_sequence(adev,
+ stoney_golden_settings_a11,
+ ARRAY_SIZE(stoney_golden_settings_a11));
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_device_program_register_sequence(adev,
+ polaris11_golden_settings_a11,
+ ARRAY_SIZE(polaris11_golden_settings_a11));
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ polaris10_golden_settings_a11,
+ ARRAY_SIZE(polaris10_golden_settings_a11));
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+ return r;
+}
+
+static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+ if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
+ return 0;
+ else
+ return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v11_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
+ */
+static void dce_v11_0_page_flip(struct amdgpu_device *adev,
+ int crtc_id, u64 crtc_base, bool async)
+{
+ struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+ u32 tmp;
+
+ /* flip immediate for async, default is vsync */
+ tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+ GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
+ /* update the scanout addresses */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(crtc_base));
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+ u32 *vbl, u32 *position)
+{
+ if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+ return -EINVAL;
+
+ *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+ *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+ return 0;
+}
+
+/**
+ * dce_v11_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ bool connected = false;
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return connected;
+
+ if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
+ connected = true;
+
+ return connected;
+}
+
+/**
+ * dce_v11_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ u32 tmp;
+ bool connected = dce_v11_0_hpd_sense(adev, hpd);
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return;
+
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ if (connected)
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v11_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+ connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+ /* don't try to enable hpd on eDP or LVDS avoid breaking the
+ * aux dp channel on imac and help (but not completely fix)
+ * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+ * also avoid interrupt storms during dpms.
+ */
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ continue;
+ }
+
+ tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
+ WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+ DC_HPD_CONNECT_INT_DELAY,
+ AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+ DC_HPD_DISCONNECT_INT_DELAY,
+ AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
+ WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+ amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v11_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
+ WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+ return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
+static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
+ bool render)
+{
+ u32 tmp;
+
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+}
+
+static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
+{
+ int num_crtc = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ num_crtc = 3;
+ break;
+ case CHIP_STONEY:
+ num_crtc = 2;
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_VEGAM:
+ num_crtc = 6;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ num_crtc = 5;
+ break;
+ default:
+ num_crtc = 0;
+ }
+ return num_crtc;
+}
+
+void dce_v11_0_disable_dce(struct amdgpu_device *adev)
+{
+ /*Disable VGA render and enabled crtc, if has DCE engine*/
+ if (amdgpu_atombios_has_dce_engine_info(adev)) {
+ u32 tmp;
+ int crtc_enabled, i;
+
+ dce_v11_0_set_vga_render_state(adev, false);
+
+ /*Disable crtc*/
+ for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) {
+ crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
+ CRTC_CONTROL, CRTC_MASTER_EN);
+ if (crtc_enabled) {
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
+ WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+ }
+ }
+ }
+}
+
+static void dce_v11_0_program_fmt(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ int bpc = 0;
+ u32 tmp = 0;
+ enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ bpc = amdgpu_connector_get_monitor_bpc(connector);
+ dither = amdgpu_connector->dither;
+ }
+
+ /* LVDS/eDP FMT is set up by atom */
+ if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+ return;
+
+ /* not needed for analog */
+ if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
+ (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
+ return;
+
+ if (bpc == 0)
+ return;
+
+ switch (bpc) {
+ case 6:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
+ }
+ break;
+ case 8:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
+ }
+ break;
+ case 10:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+ /* XXX sort out optimal dither settings */
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
+ } else {
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+ tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
+ }
+ break;
+ default:
+ /* not needed */
+ break;
+ }
+
+ WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+
+/* display watermark setup */
+/**
+ * dce_v11_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ struct drm_display_mode *mode)
+{
+ u32 tmp, buffer_alloc, i, mem_cfg;
+ u32 pipe_offset = amdgpu_crtc->crtc_id;
+ /*
+ * Line Buffer Setup
+ * There are 6 line buffers, one for each display controllers.
+ * There are 3 partitions per LB. Select the number of partitions
+ * to enable based on the display width. For display widths larger
+ * than 4096, you need use to use 2 display controllers and combine
+ * them using the stereo blender.
+ */
+ if (amdgpu_crtc->base.enabled && mode) {
+ if (mode->crtc_hdisplay < 1920) {
+ mem_cfg = 1;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 2560) {
+ mem_cfg = 2;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 4096) {
+ mem_cfg = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ } else {
+ DRM_DEBUG_KMS("Mode too big for LB!\n");
+ mem_cfg = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ }
+ } else {
+ mem_cfg = 1;
+ buffer_alloc = 0;
+ }
+
+ tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
+ WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+ tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
+ WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+ if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
+ break;
+ udelay(1);
+ }
+
+ if (amdgpu_crtc->base.enabled && mode) {
+ switch (mem_cfg) {
+ case 0:
+ default:
+ return 4096 * 2;
+ case 1:
+ return 1920 * 2;
+ case 2:
+ return 2560 * 2;
+ }
+ }
+
+ /* controller not enabled, so no lb used */
+ return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+ switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+ case 0:
+ default:
+ return 1;
+ case 1:
+ return 2;
+ case 2:
+ return 4;
+ case 3:
+ return 8;
+ case 4:
+ return 3;
+ case 5:
+ return 6;
+ case 6:
+ return 10;
+ case 7:
+ return 12;
+ case 8:
+ return 16;
+ }
+}
+
+struct dce10_wm_params {
+ u32 dram_channels; /* number of dram channels */
+ u32 yclk; /* bandwidth per dram data pin in kHz */
+ u32 sclk; /* engine clock in kHz */
+ u32 disp_clk; /* display clock in kHz */
+ u32 src_width; /* viewport width */
+ u32 active_time; /* active display time in ns */
+ u32 blank_time; /* blank time in ns */
+ bool interlaced; /* mode is interlaced */
+ fixed20_12 vsc; /* vertical scale ratio */
+ u32 num_heads; /* number of active crtcs */
+ u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+ u32 lb_size; /* line buffer allocated to pipe */
+ u32 vtaps; /* vertical scaler taps */
+};
+
+/**
+ * dce_v11_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate raw DRAM Bandwidth */
+ fixed20_12 dram_efficiency; /* 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ dram_efficiency.full = dfixed_const(7);
+ dram_efficiency.full = dfixed_div(dram_efficiency, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+ /* Calculate DRAM Bandwidth and the part allocated to display. */
+ fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+ disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v11_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the display Data return Bandwidth */
+ fixed20_12 return_efficiency; /* 0.8 */
+ fixed20_12 sclk, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ sclk.full = dfixed_const(wm->sclk);
+ sclk.full = dfixed_div(sclk, a);
+ a.full = dfixed_const(10);
+ return_efficiency.full = dfixed_const(8);
+ return_efficiency.full = dfixed_div(return_efficiency, a);
+ a.full = dfixed_const(32);
+ bandwidth.full = dfixed_mul(a, sclk);
+ bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the DMIF Request Bandwidth */
+ fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+ fixed20_12 disp_clk, bandwidth;
+ fixed20_12 a, b;
+
+ a.full = dfixed_const(1000);
+ disp_clk.full = dfixed_const(wm->disp_clk);
+ disp_clk.full = dfixed_div(disp_clk, a);
+ a.full = dfixed_const(32);
+ b.full = dfixed_mul(a, disp_clk);
+
+ a.full = dfixed_const(10);
+ disp_clk_request_efficiency.full = dfixed_const(8);
+ disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+ bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v11_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+ u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm);
+ u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm);
+ u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm);
+
+ return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v11_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm)
+{
+ /* Calculate the display mode Average Bandwidth
+ * DisplayMode should contain the source and destination dimensions,
+ * timing, etc.
+ */
+ fixed20_12 bpp;
+ fixed20_12 line_time;
+ fixed20_12 src_width;
+ fixed20_12 bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+ line_time.full = dfixed_div(line_time, a);
+ bpp.full = dfixed_const(wm->bytes_per_pixel);
+ src_width.full = dfixed_const(wm->src_width);
+ bandwidth.full = dfixed_mul(src_width, bpp);
+ bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+ bandwidth.full = dfixed_div(bandwidth, line_time);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v11_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
+{
+ /* First calculate the latency in ns */
+ u32 mc_latency = 2000; /* 2000 ns. */
+ u32 available_bandwidth = dce_v11_0_available_bandwidth(wm);
+ u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+ u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+ u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+ u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+ (wm->num_heads * cursor_line_pair_return_time);
+ u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+ u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+ u32 tmp, dmif_size = 12288;
+ fixed20_12 a, b, c;
+
+ if (wm->num_heads == 0)
+ return 0;
+
+ a.full = dfixed_const(2);
+ b.full = dfixed_const(1);
+ if ((wm->vsc.full > a.full) ||
+ ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+ (wm->vtaps >= 5) ||
+ ((wm->vsc.full >= a.full) && wm->interlaced))
+ max_src_lines_per_dst_line = 4;
+ else
+ max_src_lines_per_dst_line = 2;
+
+ a.full = dfixed_const(available_bandwidth);
+ b.full = dfixed_const(wm->num_heads);
+ a.full = dfixed_div(a, b);
+ tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+ tmp = min(dfixed_trunc(a), tmp);
+
+ lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+ a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+ b.full = dfixed_const(1000);
+ c.full = dfixed_const(lb_fill_bw);
+ b.full = dfixed_div(c, b);
+ a.full = dfixed_div(a, b);
+ line_fill_time = dfixed_trunc(a);
+
+ if (line_fill_time < wm->active_time)
+ return latency;
+ else
+ return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+ if (dce_v11_0_average_bandwidth(wm) <=
+ (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v11_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
+{
+ if (dce_v11_0_average_bandwidth(wm) <=
+ (dce_v11_0_available_bandwidth(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v11_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm)
+{
+ u32 lb_partitions = wm->lb_size / wm->src_width;
+ u32 line_time = wm->active_time + wm->blank_time;
+ u32 latency_tolerant_lines;
+ u32 latency_hiding;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1);
+ if (wm->vsc.full > a.full)
+ latency_tolerant_lines = 1;
+ else {
+ if (lb_partitions <= (wm->vtaps + 1))
+ latency_tolerant_lines = 1;
+ else
+ latency_tolerant_lines = 2;
+ }
+
+ latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+ if (dce_v11_0_latency_watermark(wm) <= latency_hiding)
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v11_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ u32 lb_size, u32 num_heads)
+{
+ struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+ struct dce10_wm_params wm_low, wm_high;
+ u32 active_time;
+ u32 line_time = 0;
+ u32 latency_watermark_a = 0, latency_watermark_b = 0;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
+
+ if (amdgpu_crtc->base.enabled && num_heads && mode) {
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
+
+ /* watermark for high clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_high.yclk =
+ amdgpu_dpm_get_mclk(adev, false) * 10;
+ wm_high.sclk =
+ amdgpu_dpm_get_sclk(adev, false) * 10;
+ } else {
+ wm_high.yclk = adev->pm.current_mclk * 10;
+ wm_high.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = active_time;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_high.interlaced = true;
+ wm_high.vsc = amdgpu_crtc->vsc;
+ wm_high.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_high.num_heads = num_heads;
+
+ /* set for high clocks */
+ latency_watermark_a = min(dce_v11_0_latency_watermark(&wm_high), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce_v11_0_check_latency_hiding(&wm_high) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+
+ /* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_low.yclk =
+ amdgpu_dpm_get_mclk(adev, true) * 10;
+ wm_low.sclk =
+ amdgpu_dpm_get_sclk(adev, true) * 10;
+ } else {
+ wm_low.yclk = adev->pm.current_mclk * 10;
+ wm_low.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = active_time;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = amdgpu_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_low.num_heads = num_heads;
+
+ /* set for low clocks */
+ latency_watermark_b = min(dce_v11_0_latency_watermark(&wm_low), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce_v11_0_check_latency_hiding(&wm_low) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+ }
+
+ /* select wm A */
+ wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* select wm B */
+ tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
+ tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ /* restore original selection */
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
+
+ /* save values for DPM */
+ amdgpu_crtc->line_time = line_time;
+ amdgpu_crtc->wm_high = latency_watermark_a;
+ amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/**
+ * dce_v11_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
+{
+ struct drm_display_mode *mode = NULL;
+ u32 num_heads = 0, lb_size;
+ int i;
+
+ amdgpu_display_update_priority(adev);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i]->base.enabled)
+ num_heads++;
+ }
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ mode = &adev->mode_info.crtcs[i]->base.mode;
+ lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
+ dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i],
+ lb_size, num_heads);
+ }
+}
+
+static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+ int i;
+ u32 offset, tmp;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ offset = adev->mode_info.audio.pin[i].offset;
+ tmp = RREG32_AUDIO_ENDPT(offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+ if (((tmp &
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
+ adev->mode_info.audio.pin[i].connected = false;
+ else
+ adev->mode_info.audio.pin[i].connected = true;
+ }
+}
+
+static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev)
+{
+ int i;
+
+ dce_v11_0_audio_get_connected_pins(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ if (adev->mode_info.audio.pin[i].connected)
+ return &adev->mode_info.audio.pin[i];
+ }
+ DRM_ERROR("No connected audio pins found!\n");
+ return NULL;
+}
+
+static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
+ WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 tmp;
+ int interlace = 0;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ interlace = 1;
+ if (connector->latency_present[interlace]) {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, connector->video_latency[interlace]);
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+ } else {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, 0);
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, 0);
+ }
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 tmp;
+ u8 *sadb = NULL;
+ int sad_count;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ if (sad_count < 0) {
+ DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
+ }
+
+ /* program the speaker allocation */
+ tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ DP_CONNECTION, 0);
+ /* set HDMI mode */
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ HDMI_CONNECTION, 1);
+ if (sad_count)
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, sadb[0]);
+ else
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, 5); /* stereo */
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+ kfree(sadb);
+}
+
+static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ struct cea_sad *sads;
+ int i, sad_count;
+
+ static const u16 eld_reg_to_type[][2] = {
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+ };
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ if (sad_count < 0)
+ DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
+ return;
+ BUG_ON(!sads);
+
+ for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+ u32 tmp = 0;
+ u8 stereo_freqs = 0;
+ int max_channels = -1;
+ int j;
+
+ for (j = 0; j < sad_count; j++) {
+ struct cea_sad *sad = &sads[j];
+
+ if (sad->format == eld_reg_to_type[i][1]) {
+ if (sad->channels > max_channels) {
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ MAX_CHANNELS, sad->channels);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ DESCRIPTOR_BYTE_2, sad->byte2);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES, sad->freq);
+ max_channels = sad->channels;
+ }
+
+ if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+ stereo_freqs |= sad->freq;
+ else
+ break;
+ }
+ }
+
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ }
+
+ kfree(sads);
+}
+
+static void dce_v11_0_audio_enable(struct amdgpu_device *adev,
+ struct amdgpu_audio_pin *pin,
+ bool enable)
+{
+ if (!pin)
+ return;
+
+ WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+ enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[] =
+{
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
+ AUD7_REGISTER_OFFSET,
+};
+
+static int dce_v11_0_audio_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return 0;
+
+ adev->mode_info.audio.enabled = true;
+
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ adev->mode_info.audio.num_pins = 7;
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_VEGAM:
+ adev->mode_info.audio.num_pins = 8;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ adev->mode_info.audio.num_pins = 6;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ adev->mode_info.audio.pin[i].channels = -1;
+ adev->mode_info.audio.pin[i].rate = -1;
+ adev->mode_info.audio.pin[i].bits_per_sample = -1;
+ adev->mode_info.audio.pin[i].status_bits = 0;
+ adev->mode_info.audio.pin[i].category_code = 0;
+ adev->mode_info.audio.pin[i].connected = false;
+ adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+ adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
+ dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ return 0;
+}
+
+static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return;
+
+ if (!adev->mode_info.audio.enabled)
+ return;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++)
+ dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+
+ adev->mode_info.audio.enabled = false;
+}
+
+/*
+ * update the N and CTS parameters for a given pixel clock rate
+ */
+static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+ WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+ WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+ WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+ WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+ WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+ WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+
+}
+
+/*
+ * build a HDMI Video Info Frame
+ */
+static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
+ void *buffer, size_t size)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ uint8_t *frame = buffer + 3;
+ uint8_t *header = buffer;
+
+ WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+ frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
+ WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+ frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
+ WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+ frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
+ WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+ frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
+}
+
+static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ u32 dto_phase = 24 * 1000;
+ u32 dto_modulo = clock;
+ u32 tmp;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* XXX two dtos; generally use dto0 for hdmi */
+ /* Express [24MHz / target pixel clock] as an exact rational
+ * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
+ * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+ */
+ tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+ tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
+ amdgpu_crtc->crtc_id);
+ WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+ WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
+ WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+ struct hdmi_avi_infoframe frame;
+ ssize_t err;
+ u32 tmp;
+ int bpc = 8;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (!dig->afmt->enabled)
+ return;
+
+ /* hdmi deep color mode general control packets setup, if bpc > 8 */
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ bpc = amdgpu_crtc->bpc;
+ }
+
+ /* disable audio prior to setting up hw */
+ dig->afmt->pin = dce_v11_0_audio_get_pin(adev);
+ dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
+
+ dce_v11_0_audio_set_dto(encoder, mode->clock);
+
+ tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
+
+ WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
+
+ tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
+ switch (bpc) {
+ case 0:
+ case 6:
+ case 8:
+ case 16:
+ default:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+ connector->name, bpc);
+ break;
+ case 10:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
+ DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+ connector->name);
+ break;
+ case 12:
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
+ DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+ connector->name);
+ break;
+ }
+ WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* enable audio info frames (frames won't be set until audio is enabled) */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+ WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ /* anything other than 0 */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+ WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
+
+ tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* set the default audio delay */
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+ /* should be suffient for all audio modes and small enough for all hblanks */
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+ WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* allow 60958 channel status fields to be updated */
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+ if (bpc > 8)
+ /* clear SW CTS value */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
+ else
+ /* select SW CTS value */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
+ /* allow hw to sent ACR packets when required */
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+ WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ dce_v11_0_afmt_update_ACR(encoder, mode->clock);
+
+ tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+ WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+ WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+ WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+ dce_v11_0_audio_write_speaker_allocation(encoder);
+
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
+ (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
+
+ dce_v11_0_afmt_audio_select_pin(encoder);
+ dce_v11_0_audio_write_sad_regs(encoder);
+ dce_v11_0_audio_write_latency_fields(encoder, mode);
+
+ err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+ if (err < 0) {
+ DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+ if (err < 0) {
+ DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ /* enable AVI info frames */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+ /* required for audio info values to be updated */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ /* send audio packets */
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
+ WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
+
+ /* enable audio after to setting up hw */
+ dce_v11_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (enable && dig->afmt->enabled)
+ return;
+ if (!enable && !dig->afmt->enabled)
+ return;
+
+ if (!enable && dig->afmt->pin) {
+ dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
+ dig->afmt->pin = NULL;
+ }
+
+ dig->afmt->enabled = enable;
+
+ DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+ enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++)
+ adev->mode_info.afmt[i] = NULL;
+
+ /* DCE11 has audio blocks tied to DIG encoders */
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+ if (adev->mode_info.afmt[i]) {
+ adev->mode_info.afmt[i]->offset = dig_offsets[i];
+ adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ kfree(adev->mode_info.afmt[i]);
+ adev->mode_info.afmt[i] = NULL;
+ }
+}
+
+static const u32 vga_control_regs[6] =
+{
+ mmD1VGA_CONTROL,
+ mmD2VGA_CONTROL,
+ mmD3VGA_CONTROL,
+ mmD4VGA_CONTROL,
+ mmD5VGA_CONTROL,
+ mmD6VGA_CONTROL,
+};
+
+static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 vga_control;
+
+ vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+ if (enable)
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
+ else
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
+}
+
+static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (enable)
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
+ else
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
+}
+
+static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, int atomic)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_framebuffer *target_fb;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *abo;
+ uint64_t fb_location, tiling_flags;
+ uint32_t fb_format, fb_pitch_pixels;
+ u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
+ u32 pipe_config;
+ u32 tmp, viewport_w, viewport_h;
+ int r;
+ bool bypass_lut = false;
+
+ /* no fb bound */
+ if (!atomic && !crtc->primary->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ if (atomic)
+ target_fb = fb;
+ else
+ target_fb = crtc->primary->fb;
+
+ /* If atomic, assume fb object is pinned & idle & fenced and
+ * just update base pointers
+ */
+ obj = target_fb->obj[0];
+ abo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(abo, false);
+ if (unlikely(r != 0))
+ return r;
+
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(abo);
+ return -EINVAL;
+ }
+ }
+ fb_location = amdgpu_bo_gpu_offset(abo);
+
+ amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+ amdgpu_bo_unreserve(abo);
+
+ pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+
+ switch (target_fb->format->format) {
+ case DRM_FORMAT_C8:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ break;
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_BGRX5551:
+ case DRM_FORMAT_BGRA5551:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_RGB565:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_BGRX1010102:
+ case DRM_FORMAT_BGRA1010102:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
+ default:
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
+ return -EINVAL;
+ }
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+ unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+ bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+ num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+ ARRAY_2D_TILED_THIN1);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
+ tile_split);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
+ mtaspect);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
+ ADDR_SURF_MICRO_TILING_DISPLAY);
+ } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+ ARRAY_1D_TILED_THIN1);
+ }
+
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
+ pipe_config);
+
+ dce_v11_0_vga_enable(crtc, false);
+
+ /* Make sure surface address is updated at vertical blank rather than
+ * horizontal blank
+ */
+ tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+ GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+ WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+ /*
+ * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+ * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+ * retain the full precision throughout the pipeline.
+ */
+ tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
+ if (bypass_lut)
+ tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
+ WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
+
+ if (bypass_lut)
+ DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+ WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+ WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+ fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+ dce_v11_0_grph_enable(crtc, true);
+
+ WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+ target_fb->height);
+
+ x &= ~3;
+ y &= ~1;
+ WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+ (x << 16) | y);
+ viewport_w = crtc->mode.hdisplay;
+ viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+ WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+ (viewport_w << 16) | viewport_h);
+
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+ if (!atomic && fb && fb != crtc->primary->fb) {
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r != 0))
+ return r;
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+
+ /* Bytes per pixel may have changed */
+ dce_v11_0_bandwidth_update(adev);
+
+ return 0;
+}
+
+static void dce_v11_0_set_interleave(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ u32 tmp;
+
+ tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
+ WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u16 *r, *g, *b;
+ int i;
+ u32 tmp;
+
+ DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+ tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
+ WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
+ WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
+ WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+ WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+ WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+ r = crtc->gamma_store;
+ g = r + crtc->gamma_size;
+ b = g + crtc->gamma_size;
+ for (i = 0; i < 256; i++) {
+ WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+ ((*r++ & 0xffc0) << 14) |
+ ((*g++ & 0xffc0) << 4) |
+ (*b++ >> 6));
+ }
+
+ tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0);
+ WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
+ WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
+ WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
+ WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+ /* XXX match this to the depth of the crtc fmt block, move to modeset? */
+ WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
+ /* XXX this only needs to be programmed once per crtc at startup,
+ * not sure where the best place for it is
+ */
+ tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
+ WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ return 1;
+ else
+ return 0;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ return 3;
+ else
+ return 2;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ return 5;
+ else
+ return 4;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ return 6;
+ default:
+ DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+ return 0;
+ }
+}
+
+/**
+ * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders. For non-DP
+ * monitors a dedicated PPLL must be used. If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself. If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ * Asic specific PLL information
+ *
+ * DCE 10.x
+ * Tonga
+ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
+ * CI
+ * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+ *
+ */
+static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 pll_in_use;
+ int pll;
+
+ if ((adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
+ struct amdgpu_encoder *amdgpu_encoder =
+ to_amdgpu_encoder(amdgpu_crtc->encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+ return ATOM_DP_DTO;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ return ATOM_COMBOPHY_PLL1;
+ else
+ return ATOM_COMBOPHY_PLL0;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ return ATOM_COMBOPHY_PLL3;
+ else
+ return ATOM_COMBOPHY_PLL2;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ return ATOM_COMBOPHY_PLL5;
+ else
+ return ATOM_COMBOPHY_PLL4;
+ default:
+ DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+ return ATOM_PPLL_INVALID;
+ }
+ }
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+ if (adev->clock.dp_extclk)
+ /* skip PPLL programming if using ext clock */
+ return ATOM_PPLL_INVALID;
+ else {
+ /* use the same PPLL for all DP monitors */
+ pll = amdgpu_pll_get_shared_dp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+ } else {
+ /* use the same PPLL for all monitors with the same clock */
+ pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+
+ /* XXX need to determine what plls are available on each DCE11 part */
+ pll_in_use = amdgpu_pll_get_use_mask(crtc);
+ if (adev->flags & AMD_IS_APU) {
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ if (!(pll_in_use & (1 << ATOM_PPLL0)))
+ return ATOM_PPLL0;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+ } else {
+ if (!(pll_in_use & (1 << ATOM_PPLL2)))
+ return ATOM_PPLL2;
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ if (!(pll_in_use & (1 << ATOM_PPLL0)))
+ return ATOM_PPLL0;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+ }
+ return ATOM_PPLL_INVALID;
+}
+
+static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint32_t cur_lock;
+
+ cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+ if (lock)
+ cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
+ else
+ cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
+ WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ u32 tmp;
+
+ tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ u32 tmp;
+
+ WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(amdgpu_crtc->cursor_addr));
+ WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(amdgpu_crtc->cursor_addr));
+
+ tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
+ int x, int y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ int xorigin = 0, yorigin = 0;
+
+ amdgpu_crtc->cursor_x = x;
+ amdgpu_crtc->cursor_y = y;
+
+ /* avivo cursor are offset into the total surface */
+ x += crtc->x;
+ y += crtc->y;
+ DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+ if (x < 0) {
+ xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+ x = 0;
+ }
+ if (y < 0) {
+ yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+ y = 0;
+ }
+
+ WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+ WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+ WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+ ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+ return 0;
+}
+
+static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int ret;
+
+ dce_v11_0_lock_cursor(crtc, true);
+ ret = dce_v11_0_cursor_move_locked(crtc, x, y);
+ dce_v11_0_lock_cursor(crtc, false);
+
+ return ret;
+}
+
+static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *aobj;
+ int ret;
+
+ if (!handle) {
+ /* turn off cursor */
+ dce_v11_0_hide_cursor(crtc);
+ obj = NULL;
+ goto unpin;
+ }
+
+ if ((width > amdgpu_crtc->max_cursor_width) ||
+ (height > amdgpu_crtc->max_cursor_height)) {
+ DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+ return -ENOENT;
+ }
+
+ aobj = gem_to_amdgpu_bo(obj);
+ ret = amdgpu_bo_reserve(aobj, false);
+ if (ret != 0) {
+ drm_gem_object_put(obj);
+ return ret;
+ }
+
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_bo_unreserve(aobj);
+ if (ret) {
+ DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+ drm_gem_object_put(obj);
+ return ret;
+ }
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+ dce_v11_0_lock_cursor(crtc, true);
+
+ if (width != amdgpu_crtc->cursor_width ||
+ height != amdgpu_crtc->cursor_height ||
+ hot_x != amdgpu_crtc->cursor_hot_x ||
+ hot_y != amdgpu_crtc->cursor_hot_y) {
+ int x, y;
+
+ x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+ y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+ dce_v11_0_cursor_move_locked(crtc, x, y);
+
+ amdgpu_crtc->cursor_width = width;
+ amdgpu_crtc->cursor_height = height;
+ amdgpu_crtc->cursor_hot_x = hot_x;
+ amdgpu_crtc->cursor_hot_y = hot_y;
+ }
+
+ dce_v11_0_show_cursor(crtc);
+ dce_v11_0_lock_cursor(crtc, false);
+
+unpin:
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ ret = amdgpu_bo_reserve(aobj, true);
+ if (likely(ret == 0)) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ drm_gem_object_put(amdgpu_crtc->cursor_bo);
+ }
+
+ amdgpu_crtc->cursor_bo = obj;
+ return 0;
+}
+
+static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo) {
+ dce_v11_0_lock_cursor(crtc, true);
+
+ dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+ amdgpu_crtc->cursor_y);
+
+ dce_v11_0_show_cursor(crtc);
+
+ dce_v11_0_lock_cursor(crtc, false);
+ }
+}
+
+static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ dce_v11_0_crtc_load_lut(crtc);
+
+ return 0;
+}
+
+static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_crtc_cleanup(crtc);
+ kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
+ .cursor_set2 = dce_v11_0_crtc_cursor_set2,
+ .cursor_move = dce_v11_0_crtc_cursor_move,
+ .gamma_set = dce_v11_0_crtc_gamma_set,
+ .set_config = amdgpu_display_crtc_set_config,
+ .destroy = dce_v11_0_crtc_destroy,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
+ .get_vblank_counter = amdgpu_get_vblank_counter_kms,
+ .enable_vblank = amdgpu_enable_vblank_kms,
+ .disable_vblank = amdgpu_disable_vblank_kms,
+ .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ unsigned type;
+
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_crtc->enabled = true;
+ amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+ dce_v11_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+ dce_v11_0_vga_enable(crtc, false);
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
+ amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+ drm_crtc_vblank_on(crtc);
+ dce_v11_0_crtc_load_lut(crtc);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ drm_crtc_vblank_off(crtc);
+ if (amdgpu_crtc->enabled) {
+ dce_v11_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+ dce_v11_0_vga_enable(crtc, false);
+ }
+ amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+ amdgpu_crtc->enabled = false;
+ break;
+ }
+ /* adjust pm to dpms */
+ amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc)
+{
+ /* disable crtc pair power gating before programming */
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+ dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v11_0_crtc_commit(struct drm_crtc *crtc)
+{
+ dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_atom_ss ss;
+ int i;
+
+ dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+ if (crtc->primary->fb) {
+ int r;
+ struct amdgpu_bo *abo;
+
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r))
+ DRM_ERROR("failed to reserve abo before unpin\n");
+ else {
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+ }
+ /* disable the GRPH */
+ dce_v11_0_grph_enable(crtc, false);
+
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i] &&
+ adev->mode_info.crtcs[i]->enabled &&
+ i != amdgpu_crtc->crtc_id &&
+ amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+ /* one other crtc is using this pll don't turn
+ * off the pll
+ */
+ goto done;
+ }
+ }
+
+ switch (amdgpu_crtc->pll_id) {
+ case ATOM_PPLL0:
+ case ATOM_PPLL1:
+ case ATOM_PPLL2:
+ /* disable the ppll */
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ case ATOM_COMBOPHY_PLL0:
+ case ATOM_COMBOPHY_PLL1:
+ case ATOM_COMBOPHY_PLL2:
+ case ATOM_COMBOPHY_PLL3:
+ case ATOM_COMBOPHY_PLL4:
+ case ATOM_COMBOPHY_PLL5:
+ /* disable the ppll */
+ amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ default:
+ break;
+ }
+done:
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode,
+ int x, int y, struct drm_framebuffer *old_fb)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (!amdgpu_crtc->adjusted_clock)
+ return -EINVAL;
+
+ if ((adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
+ struct amdgpu_encoder *amdgpu_encoder =
+ to_amdgpu_encoder(amdgpu_crtc->encoder);
+ int encoder_mode =
+ amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
+
+ /* SetPixelClock calculates the plls and ss values now */
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id,
+ amdgpu_crtc->pll_id,
+ encoder_mode, amdgpu_encoder->encoder_id,
+ adjusted_mode->clock, 0, 0, 0, 0,
+ amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
+ } else {
+ amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+ }
+ amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+ dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+ amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+ amdgpu_atombios_crtc_scaler_setup(crtc);
+ dce_v11_0_cursor_reset(crtc);
+ /* update the hw version fpr dpm */
+ amdgpu_crtc->hw_mode = *adjusted_mode;
+
+ return 0;
+}
+
+static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct drm_encoder *encoder;
+
+ /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->crtc == crtc) {
+ amdgpu_crtc->encoder = encoder;
+ amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+ break;
+ }
+ }
+ if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ return false;
+ }
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ return false;
+ if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+ return false;
+ /* pick pll */
+ amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc);
+ /* if we can't get a PPLL for a non-DP encoder, fail */
+ if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+ !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+ return false;
+
+ return true;
+}
+
+static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)
+{
+ return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, enum mode_set_atomic state)
+{
+ return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
+ .dpms = dce_v11_0_crtc_dpms,
+ .mode_fixup = dce_v11_0_crtc_mode_fixup,
+ .mode_set = dce_v11_0_crtc_mode_set,
+ .mode_set_base = dce_v11_0_crtc_set_base,
+ .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic,
+ .prepare = dce_v11_0_crtc_prepare,
+ .commit = dce_v11_0_crtc_commit,
+ .disable = dce_v11_0_crtc_disable,
+ .get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+ (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+ if (amdgpu_crtc == NULL)
+ return -ENOMEM;
+
+ drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs);
+
+ drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+ amdgpu_crtc->crtc_id = index;
+ adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+ amdgpu_crtc->max_cursor_width = 128;
+ amdgpu_crtc->max_cursor_height = 128;
+ adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+ adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+ switch (amdgpu_crtc->crtc_id) {
+ case 0:
+ default:
+ amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
+ break;
+ case 1:
+ amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
+ break;
+ case 2:
+ amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
+ break;
+ case 3:
+ amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
+ break;
+ case 4:
+ amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
+ break;
+ case 5:
+ amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
+ break;
+ }
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
+
+ return 0;
+}
+
+static int dce_v11_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
+ adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
+
+ dce_v11_0_set_display_funcs(adev);
+
+ adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 9;
+ break;
+ case CHIP_STONEY:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 9;
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_VEGAM:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 6;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ adev->mode_info.num_hpd = 5;
+ adev->mode_info.num_dig = 5;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ dce_v11_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int dce_v11_0_sw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ if (r)
+ return r;
+ }
+
+ /* HPD hotplug */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+
+ /* allocate crtcs */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = dce_v11_0_crtc_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+ amdgpu_display_print_display_setup(adev_to_drm(adev));
+ else
+ return -EINVAL;
+
+ /* setup afmt */
+ r = dce_v11_0_afmt_init(adev);
+ if (r)
+ return r;
+
+ r = dce_v11_0_audio_init(adev);
+ if (r)
+ return r;
+
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
+}
+
+static int dce_v11_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ kfree(adev->mode_info.bios_hardcoded_edid);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+ dce_v11_0_audio_fini(adev);
+
+ dce_v11_0_afmt_fini(adev);
+
+ drm_mode_config_cleanup(adev_to_drm(adev));
+ adev->mode_info.mode_config_initialized = false;
+
+ return 0;
+}
+
+static int dce_v11_0_hw_init(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v11_0_init_golden_registers(adev);
+
+ /* disable vga render */
+ dce_v11_0_set_vga_render_state(adev, false);
+ /* init dig PHYs, disp eng pll */
+ amdgpu_atombios_crtc_powergate_init(adev);
+ amdgpu_atombios_encoder_init_dig(adev);
+ if ((adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
+ amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
+ DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
+ amdgpu_atombios_crtc_set_dce_clock(adev, 0,
+ DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS);
+ } else {
+ amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+ }
+
+ /* initialize hpd */
+ dce_v11_0_hpd_init(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v11_0_pageflip_interrupt_init(adev);
+
+ return 0;
+}
+
+static int dce_v11_0_hw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v11_0_hpd_fini(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v11_0_pageflip_interrupt_fini(adev);
+
+ flush_delayed_work(&adev->hotplug_work);
+
+ return 0;
+}
+
+static int dce_v11_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+ return dce_v11_0_hw_fini(handle);
+}
+
+static int dce_v11_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
+ ret = dce_v11_0_hw_init(handle);
+
+ /* turn on the BL */
+ if (adev->mode_info.bl_encoder) {
+ u8 bl_level = amdgpu_display_backlight_get_level(adev,
+ adev->mode_info.bl_encoder);
+ amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+ bl_level);
+ }
+ if (ret)
+ return ret;
+
+ return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v11_0_is_idle(void *handle)
+{
+ return true;
+}
+
+static int dce_v11_0_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int dce_v11_0_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (dce_v11_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VBLANK_INTERRUPT_MASK, 0);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VBLANK_INTERRUPT_MASK, 1);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VLINE_INTERRUPT_MASK, 0);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+ lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+ VLINE_INTERRUPT_MASK, 1);
+ WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned hpd,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", hpd);
+ return 0;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CRTC_IRQ_VBLANK1:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK2:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK3:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK4:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK5:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK6:
+ dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE1:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE2:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE3:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE4:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE5:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE6:
+ dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg;
+
+ if (type >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", type);
+ return -EINVAL;
+ }
+
+ reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+ if (state == AMDGPU_IRQ_STATE_DISABLE)
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+ else
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+ return 0;
+}
+
+static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned long flags;
+ unsigned crtc_id;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_flip_work *works;
+
+ crtc_id = (entry->src_id - 8) >> 1;
+ amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+ if (crtc_id >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+ return -EINVAL;
+ }
+
+ if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+ WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+ /* IRQ could occur when in initial stage */
+ if(amdgpu_crtc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ works = amdgpu_crtc->pflip_works;
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+ "AMDGPU_FLIP_SUBMITTED(%d)\n",
+ amdgpu_crtc->pflip_status,
+ AMDGPU_FLIP_SUBMITTED);
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ return 0;
+ }
+
+ /* page flip completed. clean up */
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+ amdgpu_crtc->pflip_works = NULL;
+
+ /* wakeup usersapce */
+ if(works->event)
+ drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+ drm_crtc_vblank_put(&amdgpu_crtc->base);
+ schedule_work(&works->unpin_work);
+
+ return 0;
+}
+
+static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+ tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
+ WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
+ int crtc)
+{
+ u32 tmp;
+
+ if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
+ tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
+ WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev,
+ int crtc)
+{
+ u32 tmp;
+
+ if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
+ tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
+ WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned crtc = entry->src_id - 1;
+ uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
+
+ switch (entry->src_data[0]) {
+ case 0: /* vblank */
+ if (disp_int & interrupt_status_offsets[crtc].vblank)
+ dce_v11_0_crtc_vblank_int_ack(adev, crtc);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ if (amdgpu_irq_enabled(adev, source, irq_type)) {
+ drm_handle_vblank(adev_to_drm(adev), crtc);
+ }
+ DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
+ break;
+ case 1: /* vline */
+ if (disp_int & interrupt_status_offsets[crtc].vline)
+ dce_v11_0_crtc_vline_int_ack(adev, crtc);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
+ break;
+ default:
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t disp_int, mask;
+ unsigned hpd;
+
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ return 0;
+ }
+
+ hpd = entry->src_data[0];
+ disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+ mask = interrupt_status_offsets[hpd].hpd;
+
+ if (disp_int & mask) {
+ dce_v11_0_hpd_int_ack(adev, hpd);
+ schedule_delayed_work(&adev->hotplug_work, 0);
+ DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+ }
+
+ return 0;
+}
+
+static int dce_v11_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int dce_v11_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
+ .name = "dce_v11_0",
+ .early_init = dce_v11_0_early_init,
+ .late_init = NULL,
+ .sw_init = dce_v11_0_sw_init,
+ .sw_fini = dce_v11_0_sw_fini,
+ .hw_init = dce_v11_0_hw_init,
+ .hw_fini = dce_v11_0_hw_fini,
+ .suspend = dce_v11_0_suspend,
+ .resume = dce_v11_0_resume,
+ .is_idle = dce_v11_0_is_idle,
+ .wait_for_idle = dce_v11_0_wait_for_idle,
+ .soft_reset = dce_v11_0_soft_reset,
+ .set_clockgating_state = dce_v11_0_set_clockgating_state,
+ .set_powergating_state = dce_v11_0_set_powergating_state,
+};
+
+static void
+dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+ /* need to call this here rather than in prepare() since we need some crtc info */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ /* set scaler clears this on some chips */
+ dce_v11_0_set_interleave(encoder->crtc, mode);
+
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+ dce_v11_0_afmt_enable(encoder, true);
+ dce_v11_0_afmt_setmode(encoder, adjusted_mode);
+ }
+}
+
+static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if ((amdgpu_encoder->active_device &
+ (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+ ENCODER_OBJECT_ID_NONE)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ if (dig) {
+ dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder);
+ if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+ dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+ }
+ }
+
+ amdgpu_atombios_scratch_regs_lock(adev, true);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* select the clock/data port if it uses a router */
+ if (amdgpu_connector->router.cd_valid)
+ amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+ /* turn eDP panel on for mode set */
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ }
+
+ /* this is needed for the pll/ss setup to work correctly in some cases */
+ amdgpu_atombios_encoder_set_crtc_source(encoder);
+ /* set up the FMT blocks */
+ dce_v11_0_program_fmt(encoder);
+}
+
+static void dce_v11_0_encoder_commit(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* need to call this here as we need the crtc set up */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v11_0_encoder_disable(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig;
+
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ if (amdgpu_atombios_encoder_is_digital(encoder)) {
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+ dce_v11_0_afmt_enable(encoder, false);
+ dig = amdgpu_encoder->enc_priv;
+ dig->dig_encoder = -1;
+ }
+ amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v11_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v11_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v11_0_ext_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v11_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
+ .dpms = dce_v11_0_ext_dpms,
+ .prepare = dce_v11_0_ext_prepare,
+ .mode_set = dce_v11_0_ext_mode_set,
+ .commit = dce_v11_0_ext_commit,
+ .disable = dce_v11_0_ext_disable,
+ /* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v11_0_encoder_prepare,
+ .mode_set = dce_v11_0_encoder_mode_set,
+ .commit = dce_v11_0_encoder_commit,
+ .disable = dce_v11_0_encoder_disable,
+ .detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v11_0_encoder_prepare,
+ .mode_set = dce_v11_0_encoder_mode_set,
+ .commit = dce_v11_0_encoder_commit,
+ .detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+ kfree(amdgpu_encoder->enc_priv);
+ drm_encoder_cleanup(encoder);
+ kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = {
+ .destroy = dce_v11_0_encoder_destroy,
+};
+
+static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
+ uint32_t encoder_enum,
+ uint32_t supported_device,
+ u16 caps)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ /* see if we already added it */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->encoder_enum == encoder_enum) {
+ amdgpu_encoder->devices |= supported_device;
+ return;
+ }
+
+ }
+
+ /* add a new one */
+ amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+ if (!amdgpu_encoder)
+ return;
+
+ encoder = &amdgpu_encoder->base;
+ switch (adev->mode_info.num_crtc) {
+ case 1:
+ encoder->possible_crtcs = 0x1;
+ break;
+ case 2:
+ default:
+ encoder->possible_crtcs = 0x3;
+ break;
+ case 3:
+ encoder->possible_crtcs = 0x7;
+ break;
+ case 4:
+ encoder->possible_crtcs = 0xf;
+ break;
+ case 5:
+ encoder->possible_crtcs = 0x1f;
+ break;
+ case 6:
+ encoder->possible_crtcs = 0x3f;
+ break;
+ }
+
+ amdgpu_encoder->enc_priv = NULL;
+
+ amdgpu_encoder->encoder_enum = encoder_enum;
+ amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ amdgpu_encoder->devices = supported_device;
+ amdgpu_encoder->rmx_type = RMX_OFF;
+ amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+ amdgpu_encoder->is_ext_encoder = false;
+ amdgpu_encoder->caps = caps;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ amdgpu_encoder->rmx_type = RMX_FULL;
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ } else {
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ }
+ drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_SI170B:
+ case ENCODER_OBJECT_ID_CH7303:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+ case ENCODER_OBJECT_ID_TITFP513:
+ case ENCODER_OBJECT_ID_VT1623:
+ case ENCODER_OBJECT_ID_HDMI_SI1930:
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ /* these are handled by the primary encoders */
+ amdgpu_encoder->is_ext_encoder = true;
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ else
+ drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
+ break;
+ }
+}
+
+static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
+ .bandwidth_update = &dce_v11_0_bandwidth_update,
+ .vblank_get_counter = &dce_v11_0_vblank_get_counter,
+ .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+ .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+ .hpd_sense = &dce_v11_0_hpd_sense,
+ .hpd_set_polarity = &dce_v11_0_hpd_set_polarity,
+ .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg,
+ .page_flip = &dce_v11_0_page_flip,
+ .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
+ .add_encoder = &dce_v11_0_encoder_add,
+ .add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
+{
+ adev->mode_info.funcs = &dce_v11_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
+ .set = dce_v11_0_set_crtc_irq_state,
+ .process = dce_v11_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = {
+ .set = dce_v11_0_set_pageflip_irq_state,
+ .process = dce_v11_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = {
+ .set = dce_v11_0_set_hpd_irq_state,
+ .process = dce_v11_0_hpd_irq,
+};
+
+static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.num_crtc > 0)
+ adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+ else
+ adev->crtc_irq.num_types = 0;
+ adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs;
+
+ adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+ adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs;
+
+ adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+ adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v11_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &dce_v11_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v11_2_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 11,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &dce_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
new file mode 100644
index 000000000..0d878ca3a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V11_0_H__
+#define __DCE_V11_0_H__
+
+extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;
+
+void dce_v11_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
new file mode 100644
index 000000000..7f85ba5b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -0,0 +1,3484 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+#include "dce_v6_0.h"
+#include "si_enums.h"
+
+static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[6] =
+{
+ SI_CRTC0_REGISTER_OFFSET,
+ SI_CRTC1_REGISTER_OFFSET,
+ SI_CRTC2_REGISTER_OFFSET,
+ SI_CRTC3_REGISTER_OFFSET,
+ SI_CRTC4_REGISTER_OFFSET,
+ SI_CRTC5_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] =
+{
+ mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
+};
+
+static const uint32_t dig_offsets[] = {
+ SI_CRTC0_REGISTER_OFFSET,
+ SI_CRTC1_REGISTER_OFFSET,
+ SI_CRTC2_REGISTER_OFFSET,
+ SI_CRTC3_REGISTER_OFFSET,
+ SI_CRTC4_REGISTER_OFFSET,
+ SI_CRTC5_REGISTER_OFFSET,
+ (0x13830 - 0x7030) >> 2,
+};
+
+static const struct {
+ uint32_t reg;
+ uint32_t vblank;
+ uint32_t vline;
+ uint32_t hpd;
+
+} interrupt_status_offsets[6] = { {
+ .reg = mmDISP_INTERRUPT_STATUS,
+ .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+ return r;
+}
+
+static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
+ reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+ if (crtc >= adev->mode_info.num_crtc)
+ return 0;
+ else
+ return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v6_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v6_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v6_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Does the actual pageflip (evergreen+).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
+static void dce_v6_0_page_flip(struct amdgpu_device *adev,
+ int crtc_id, u64 crtc_base, bool async)
+{
+ struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+
+ /* flip at hsync for async, default is vsync */
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
+ GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
+ /* update the scanout addresses */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(crtc_base));
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32)crtc_base);
+
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+ u32 *vbl, u32 *position)
+{
+ if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+ return -EINVAL;
+ *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+ *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+ return 0;
+
+}
+
+/**
+ * dce_v6_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ bool connected = false;
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return connected;
+
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ connected = true;
+
+ return connected;
+}
+
+/**
+ * dce_v6_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ u32 tmp;
+ bool connected = dce_v6_0_hpd_sense(adev, hpd);
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return;
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ if (connected)
+ tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+ else
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v6_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp |= DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+ connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+ /* don't try to enable hpd on eDP or LVDS avoid breaking the
+ * aux dp channel on imac and help (but not completely fix)
+ * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+ * also avoid interrupt storms during dpms.
+ */
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ continue;
+ }
+
+ dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+ amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v6_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+ return mmDC_GPIO_HPD_A;
+}
+
+static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
+ bool render)
+{
+ if (!render)
+ WREG32(mmVGA_RENDER_CONTROL,
+ RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
+
+}
+
+static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ return 6;
+ case CHIP_OLAND:
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+void dce_v6_0_disable_dce(struct amdgpu_device *adev)
+{
+ /*Disable VGA render and enabled crtc, if has DCE engine*/
+ if (amdgpu_atombios_has_dce_engine_info(adev)) {
+ u32 tmp;
+ int crtc_enabled, i;
+
+ dce_v6_0_set_vga_render_state(adev, false);
+
+ /*Disable crtc*/
+ for (i = 0; i < dce_v6_0_get_num_crtc(adev); i++) {
+ crtc_enabled = RREG32(mmCRTC_CONTROL + crtc_offsets[i]) &
+ CRTC_CONTROL__CRTC_MASTER_EN_MASK;
+ if (crtc_enabled) {
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ tmp &= ~CRTC_CONTROL__CRTC_MASTER_EN_MASK;
+ WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+ }
+ }
+ }
+}
+
+static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
+{
+
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ int bpc = 0;
+ u32 tmp = 0;
+ enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ bpc = amdgpu_connector_get_monitor_bpc(connector);
+ dither = amdgpu_connector->dither;
+ }
+
+ /* LVDS FMT is set up by atom */
+ if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+ return;
+
+ if (bpc == 0)
+ return;
+
+
+ switch (bpc) {
+ case 6:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE)
+ /* XXX sort out optimal dither settings */
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK);
+ else
+ tmp |= FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK;
+ break;
+ case 8:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE)
+ /* XXX sort out optimal dither settings */
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH_MASK);
+ else
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH_MASK);
+ break;
+ case 10:
+ default:
+ /* not needed */
+ break;
+ }
+
+ WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+/**
+ * si_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 si_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+ switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
+ case 0:
+ default:
+ return 1;
+ case 1:
+ return 2;
+ case 2:
+ return 4;
+ case 3:
+ return 8;
+ case 4:
+ return 3;
+ case 5:
+ return 6;
+ case 6:
+ return 10;
+ case 7:
+ return 12;
+ case 8:
+ return 16;
+ }
+}
+
+struct dce6_wm_params {
+ u32 dram_channels; /* number of dram channels */
+ u32 yclk; /* bandwidth per dram data pin in kHz */
+ u32 sclk; /* engine clock in kHz */
+ u32 disp_clk; /* display clock in kHz */
+ u32 src_width; /* viewport width */
+ u32 active_time; /* active display time in ns */
+ u32 blank_time; /* blank time in ns */
+ bool interlaced; /* mode is interlaced */
+ fixed20_12 vsc; /* vertical scale ratio */
+ u32 num_heads; /* number of active crtcs */
+ u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+ u32 lb_size; /* line buffer allocated to pipe */
+ u32 vtaps; /* vertical scaler taps */
+};
+
+/**
+ * dce_v6_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_dram_bandwidth(struct dce6_wm_params *wm)
+{
+ /* Calculate raw DRAM Bandwidth */
+ fixed20_12 dram_efficiency; /* 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ dram_efficiency.full = dfixed_const(7);
+ dram_efficiency.full = dfixed_div(dram_efficiency, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v6_0_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+ /* Calculate DRAM Bandwidth and the part allocated to display. */
+ fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+ disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_data_return_bandwidth(struct dce6_wm_params *wm)
+{
+ /* Calculate the display Data return Bandwidth */
+ fixed20_12 return_efficiency; /* 0.8 */
+ fixed20_12 sclk, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ sclk.full = dfixed_const(wm->sclk);
+ sclk.full = dfixed_div(sclk, a);
+ a.full = dfixed_const(10);
+ return_efficiency.full = dfixed_const(8);
+ return_efficiency.full = dfixed_div(return_efficiency, a);
+ a.full = dfixed_const(32);
+ bandwidth.full = dfixed_mul(a, sclk);
+ bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_dmif_request_bandwidth(struct dce6_wm_params *wm)
+{
+ /* Calculate the DMIF Request Bandwidth */
+ fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+ fixed20_12 disp_clk, bandwidth;
+ fixed20_12 a, b;
+
+ a.full = dfixed_const(1000);
+ disp_clk.full = dfixed_const(wm->disp_clk);
+ disp_clk.full = dfixed_div(disp_clk, a);
+ a.full = dfixed_const(32);
+ b.full = dfixed_mul(a, disp_clk);
+
+ a.full = dfixed_const(10);
+ disp_clk_request_efficiency.full = dfixed_const(8);
+ disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+ bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_available_bandwidth(struct dce6_wm_params *wm)
+{
+ /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+ u32 dram_bandwidth = dce_v6_0_dram_bandwidth(wm);
+ u32 data_return_bandwidth = dce_v6_0_data_return_bandwidth(wm);
+ u32 dmif_req_bandwidth = dce_v6_0_dmif_request_bandwidth(wm);
+
+ return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v6_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_average_bandwidth(struct dce6_wm_params *wm)
+{
+ /* Calculate the display mode Average Bandwidth
+ * DisplayMode should contain the source and destination dimensions,
+ * timing, etc.
+ */
+ fixed20_12 bpp;
+ fixed20_12 line_time;
+ fixed20_12 src_width;
+ fixed20_12 bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+ line_time.full = dfixed_div(line_time, a);
+ bpp.full = dfixed_const(wm->bytes_per_pixel);
+ src_width.full = dfixed_const(wm->src_width);
+ bandwidth.full = dfixed_mul(src_width, bpp);
+ bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+ bandwidth.full = dfixed_div(bandwidth, line_time);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm)
+{
+ /* First calculate the latency in ns */
+ u32 mc_latency = 2000; /* 2000 ns. */
+ u32 available_bandwidth = dce_v6_0_available_bandwidth(wm);
+ u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+ u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+ u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+ u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+ (wm->num_heads * cursor_line_pair_return_time);
+ u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+ u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+ u32 tmp, dmif_size = 12288;
+ fixed20_12 a, b, c;
+
+ if (wm->num_heads == 0)
+ return 0;
+
+ a.full = dfixed_const(2);
+ b.full = dfixed_const(1);
+ if ((wm->vsc.full > a.full) ||
+ ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+ (wm->vtaps >= 5) ||
+ ((wm->vsc.full >= a.full) && wm->interlaced))
+ max_src_lines_per_dst_line = 4;
+ else
+ max_src_lines_per_dst_line = 2;
+
+ a.full = dfixed_const(available_bandwidth);
+ b.full = dfixed_const(wm->num_heads);
+ a.full = dfixed_div(a, b);
+ tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+ tmp = min(dfixed_trunc(a), tmp);
+
+ lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+ a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+ b.full = dfixed_const(1000);
+ c.full = dfixed_const(lb_fill_bw);
+ b.full = dfixed_div(c, b);
+ a.full = dfixed_div(a, b);
+ line_fill_time = dfixed_trunc(a);
+
+ if (line_fill_time < wm->active_time)
+ return latency;
+ else
+ return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+ if (dce_v6_0_average_bandwidth(wm) <=
+ (dce_v6_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v6_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
+{
+ if (dce_v6_0_average_bandwidth(wm) <=
+ (dce_v6_0_available_bandwidth(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v6_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_check_latency_hiding(struct dce6_wm_params *wm)
+{
+ u32 lb_partitions = wm->lb_size / wm->src_width;
+ u32 line_time = wm->active_time + wm->blank_time;
+ u32 latency_tolerant_lines;
+ u32 latency_hiding;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1);
+ if (wm->vsc.full > a.full)
+ latency_tolerant_lines = 1;
+ else {
+ if (lb_partitions <= (wm->vtaps + 1))
+ latency_tolerant_lines = 1;
+ else
+ latency_tolerant_lines = 2;
+ }
+
+ latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+ if (dce_v6_0_latency_watermark(wm) <= latency_hiding)
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v6_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ u32 lb_size, u32 num_heads)
+{
+ struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+ struct dce6_wm_params wm_low, wm_high;
+ u32 dram_channels;
+ u32 active_time;
+ u32 line_time = 0;
+ u32 latency_watermark_a = 0, latency_watermark_b = 0;
+ u32 priority_a_mark = 0, priority_b_mark = 0;
+ u32 priority_a_cnt = PRIORITY_OFF;
+ u32 priority_b_cnt = PRIORITY_OFF;
+ u32 tmp, arb_control3, lb_vblank_lead_lines = 0;
+ fixed20_12 a, b, c;
+
+ if (amdgpu_crtc->base.enabled && num_heads && mode) {
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
+ priority_a_cnt = 0;
+ priority_b_cnt = 0;
+
+ dram_channels = si_get_number_of_dram_channels(adev);
+
+ /* watermark for high clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_high.yclk =
+ amdgpu_dpm_get_mclk(adev, false) * 10;
+ wm_high.sclk =
+ amdgpu_dpm_get_sclk(adev, false) * 10;
+ } else {
+ wm_high.yclk = adev->pm.current_mclk * 10;
+ wm_high.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = active_time;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_high.interlaced = true;
+ wm_high.vsc = amdgpu_crtc->vsc;
+ wm_high.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = dram_channels;
+ wm_high.num_heads = num_heads;
+
+ if (adev->pm.dpm_enabled) {
+ /* watermark for low clocks */
+ wm_low.yclk =
+ amdgpu_dpm_get_mclk(adev, true) * 10;
+ wm_low.sclk =
+ amdgpu_dpm_get_sclk(adev, true) * 10;
+ } else {
+ wm_low.yclk = adev->pm.current_mclk * 10;
+ wm_low.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = active_time;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = amdgpu_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = dram_channels;
+ wm_low.num_heads = num_heads;
+
+ /* set for high clocks */
+ latency_watermark_a = min(dce_v6_0_latency_watermark(&wm_high), (u32)65535);
+ /* set for low clocks */
+ latency_watermark_b = min(dce_v6_0_latency_watermark(&wm_low), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce_v6_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce_v6_0_check_latency_hiding(&wm_high) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ priority_a_cnt |= PRIORITY_ALWAYS_ON;
+ priority_b_cnt |= PRIORITY_ALWAYS_ON;
+ }
+ if (!dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce_v6_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce_v6_0_check_latency_hiding(&wm_low) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ priority_a_cnt |= PRIORITY_ALWAYS_ON;
+ priority_b_cnt |= PRIORITY_ALWAYS_ON;
+ }
+
+ a.full = dfixed_const(1000);
+ b.full = dfixed_const(mode->clock);
+ b.full = dfixed_div(b, a);
+ c.full = dfixed_const(latency_watermark_a);
+ c.full = dfixed_mul(c, b);
+ c.full = dfixed_mul(c, amdgpu_crtc->hsc);
+ c.full = dfixed_div(c, a);
+ a.full = dfixed_const(16);
+ c.full = dfixed_div(c, a);
+ priority_a_mark = dfixed_trunc(c);
+ priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
+
+ a.full = dfixed_const(1000);
+ b.full = dfixed_const(mode->clock);
+ b.full = dfixed_div(b, a);
+ c.full = dfixed_const(latency_watermark_b);
+ c.full = dfixed_mul(c, b);
+ c.full = dfixed_mul(c, amdgpu_crtc->hsc);
+ c.full = dfixed_div(c, a);
+ a.full = dfixed_const(16);
+ c.full = dfixed_div(c, a);
+ priority_b_mark = dfixed_trunc(c);
+ priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+ }
+
+ /* select wm A */
+ arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
+ tmp = arb_control3;
+ tmp &= ~LATENCY_WATERMARK_MASK(3);
+ tmp |= LATENCY_WATERMARK_MASK(1);
+ WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+ ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+ (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+ /* select wm B */
+ tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
+ tmp &= ~LATENCY_WATERMARK_MASK(3);
+ tmp |= LATENCY_WATERMARK_MASK(2);
+ WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+ ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+ (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+ /* restore original selection */
+ WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, arb_control3);
+
+ /* write the priority marks */
+ WREG32(mmPRIORITY_A_CNT + amdgpu_crtc->crtc_offset, priority_a_cnt);
+ WREG32(mmPRIORITY_B_CNT + amdgpu_crtc->crtc_offset, priority_b_cnt);
+
+ /* save values for DPM */
+ amdgpu_crtc->line_time = line_time;
+ amdgpu_crtc->wm_high = latency_watermark_a;
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/* watermark setup */
+static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *other_mode)
+{
+ u32 tmp, buffer_alloc, i;
+ u32 pipe_offset = amdgpu_crtc->crtc_id * 0x8;
+ /*
+ * Line Buffer Setup
+ * There are 3 line buffers, each one shared by 2 display controllers.
+ * mmDC_LB_MEMORY_SPLIT controls how that line buffer is shared between
+ * the display controllers. The paritioning is done via one of four
+ * preset allocations specified in bits 21:20:
+ * 0 - half lb
+ * 2 - whole lb, other crtc must be disabled
+ */
+ /* this can get tricky if we have two large displays on a paired group
+ * of crtcs. Ideally for multiple large displays we'd assign them to
+ * non-linked crtcs for maximum line buffer allocation.
+ */
+ if (amdgpu_crtc->base.enabled && mode) {
+ if (other_mode) {
+ tmp = 0; /* 1/2 */
+ buffer_alloc = 1;
+ } else {
+ tmp = 2; /* whole */
+ buffer_alloc = 2;
+ }
+ } else {
+ tmp = 0;
+ buffer_alloc = 0;
+ }
+
+ WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
+ DC_LB_MEMORY_CONFIG(tmp));
+
+ WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+ (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+ PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATION_COMPLETED_MASK)
+ break;
+ udelay(1);
+ }
+
+ if (amdgpu_crtc->base.enabled && mode) {
+ switch (tmp) {
+ case 0:
+ default:
+ return 4096 * 2;
+ case 2:
+ return 8192 * 2;
+ }
+ }
+
+ /* controller not enabled, so no lb used */
+ return 0;
+}
+
+
+/**
+ * dce_v6_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
+{
+ struct drm_display_mode *mode0 = NULL;
+ struct drm_display_mode *mode1 = NULL;
+ u32 num_heads = 0, lb_size;
+ int i;
+
+ if (!adev->mode_info.mode_config_initialized)
+ return;
+
+ amdgpu_display_update_priority(adev);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i]->base.enabled)
+ num_heads++;
+ }
+ for (i = 0; i < adev->mode_info.num_crtc; i += 2) {
+ mode0 = &adev->mode_info.crtcs[i]->base.mode;
+ mode1 = &adev->mode_info.crtcs[i+1]->base.mode;
+ lb_size = dce_v6_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode0, mode1);
+ dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i], lb_size, num_heads);
+ lb_size = dce_v6_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i+1], mode1, mode0);
+ dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
+ }
+}
+
+static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+ int i;
+ u32 tmp;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+ if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT,
+ PORT_CONNECTIVITY))
+ adev->mode_info.audio.pin[i].connected = false;
+ else
+ adev->mode_info.audio.pin[i].connected = true;
+ }
+
+}
+
+static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *adev)
+{
+ int i;
+
+ dce_v6_0_audio_get_connected_pins(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ if (adev->mode_info.audio.pin[i].connected)
+ return &adev->mode_info.audio.pin[i];
+ }
+ DRM_ERROR("No connected audio pins found!\n");
+ return NULL;
+}
+
+static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset,
+ REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT,
+ dig->afmt->pin->id));
+}
+
+static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ int interlace = 0;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ interlace = 1;
+
+ if (connector->latency_present[interlace]) {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, connector->video_latency[interlace]);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+ } else {
+ tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ VIDEO_LIPSYNC, 0);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+ AUDIO_LIPSYNC, 0);
+ }
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u8 *sadb = NULL;
+ int sad_count;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ if (sad_count < 0) {
+ DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
+ }
+
+ /* program the speaker allocation */
+ tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ HDMI_CONNECTION, 0);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ DP_CONNECTION, 0);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ DP_CONNECTION, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ HDMI_CONNECTION, 1);
+
+ if (sad_count)
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, sadb[0]);
+ else
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+ SPEAKER_ALLOCATION, 5); /* stereo */
+
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+ kfree(sadb);
+}
+
+static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ struct cea_sad *sads;
+ int i, sad_count;
+
+ static const u16 eld_reg_to_type[][2] = {
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+ };
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ if (sad_count < 0)
+ DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+ u32 tmp = 0;
+ u8 stereo_freqs = 0;
+ int max_channels = -1;
+ int j;
+
+ for (j = 0; j < sad_count; j++) {
+ struct cea_sad *sad = &sads[j];
+
+ if (sad->format == eld_reg_to_type[i][1]) {
+ if (sad->channels > max_channels) {
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ MAX_CHANNELS, sad->channels);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ DESCRIPTOR_BYTE_2, sad->byte2);
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES, sad->freq);
+ max_channels = sad->channels;
+ }
+
+ if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+ stereo_freqs |= sad->freq;
+ else
+ break;
+ }
+ }
+
+ tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+ SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
+ WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ }
+
+ kfree(sads);
+
+}
+
+static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
+ struct amdgpu_audio_pin *pin,
+ bool enable)
+{
+ if (!pin)
+ return;
+
+ WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+ enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[7] =
+{
+ (0x1780 - 0x1780),
+ (0x1786 - 0x1780),
+ (0x178c - 0x1780),
+ (0x1792 - 0x1780),
+ (0x1798 - 0x1780),
+ (0x179d - 0x1780),
+ (0x17a4 - 0x1780),
+};
+
+static int dce_v6_0_audio_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return 0;
+
+ adev->mode_info.audio.enabled = true;
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ default:
+ adev->mode_info.audio.num_pins = 6;
+ break;
+ case CHIP_OLAND:
+ adev->mode_info.audio.num_pins = 2;
+ break;
+ }
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ adev->mode_info.audio.pin[i].channels = -1;
+ adev->mode_info.audio.pin[i].rate = -1;
+ adev->mode_info.audio.pin[i].bits_per_sample = -1;
+ adev->mode_info.audio.pin[i].status_bits = 0;
+ adev->mode_info.audio.pin[i].category_code = 0;
+ adev->mode_info.audio.pin[i].connected = false;
+ adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+ adev->mode_info.audio.pin[i].id = i;
+ dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ return 0;
+}
+
+static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return;
+
+ if (!adev->mode_info.audio.enabled)
+ return;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++)
+ dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+
+ adev->mode_info.audio.enabled = false;
+}
+
+static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1);
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder,
+ uint32_t clock, int bpc)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE,
+ bpc > 8 ? 0 : 1);
+ WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+ WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+ WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+ WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+ WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+ WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+ tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+ WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ struct hdmi_avi_infoframe frame;
+ u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+ uint8_t *payload = buffer + 3;
+ uint8_t *header = buffer;
+ ssize_t err;
+ u32 tmp;
+
+ err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+ if (err < 0) {
+ DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+ if (err < 0) {
+ DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+ payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24));
+ WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+ payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24));
+ WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+ payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24));
+ WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+ payload[0xC] | (payload[0xD] << 8) | (header[1] << 24));
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ /* anything other than 0 */
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1,
+ HDMI_AUDIO_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ u32 tmp;
+
+ /*
+ * Two dtos: generally use dto0 for hdmi, dto1 for dp.
+ * Express [24MHz / target pixel clock] as an exact rational
+ * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
+ * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+ */
+ tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+ tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+ DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id);
+ if (em == ATOM_ENCODER_MODE_HDMI) {
+ tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+ DCCG_AUDIO_DTO_SEL, 0);
+ } else if (ENCODER_MODE_IS_DP(em)) {
+ tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+ DCCG_AUDIO_DTO_SEL, 1);
+ }
+ WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+ if (em == ATOM_ENCODER_MODE_HDMI) {
+ WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000);
+ WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock);
+ } else if (ENCODER_MODE_IS_DP(em)) {
+ WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000);
+ WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock);
+ }
+}
+
+static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+ WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+ WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+ WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+ tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+ WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+ WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ tmp = RREG32(mmHDMI_GC + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0);
+ WREG32(mmHDMI_GC + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+ } else {
+ tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0);
+ tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0);
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+ }
+}
+
+static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1);
+ WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp);
+
+ tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset);
+ tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+ WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp);
+ } else {
+ WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0);
+ }
+}
+
+static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+ int bpc = 8;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ if (!dig->afmt->enabled)
+ return;
+
+ dig->afmt->pin = dce_v6_0_audio_get_pin(adev);
+ if (!dig->afmt->pin)
+ return;
+
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ bpc = amdgpu_crtc->bpc;
+ }
+
+ /* disable audio before setting up hw */
+ dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
+
+ dce_v6_0_audio_set_mute(encoder, true);
+ dce_v6_0_audio_write_speaker_allocation(encoder);
+ dce_v6_0_audio_write_sad_regs(encoder);
+ dce_v6_0_audio_write_latency_fields(encoder, mode);
+ if (em == ATOM_ENCODER_MODE_HDMI) {
+ dce_v6_0_audio_set_dto(encoder, mode->clock);
+ dce_v6_0_audio_set_vbi_packet(encoder);
+ dce_v6_0_audio_set_acr(encoder, mode->clock, bpc);
+ } else if (ENCODER_MODE_IS_DP(em)) {
+ dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10);
+ }
+ dce_v6_0_audio_set_packet(encoder);
+ dce_v6_0_audio_select_pin(encoder);
+ dce_v6_0_audio_set_avi_infoframe(encoder, mode);
+ dce_v6_0_audio_set_mute(encoder, false);
+ if (em == ATOM_ENCODER_MODE_HDMI) {
+ dce_v6_0_audio_hdmi_enable(encoder, 1);
+ } else if (ENCODER_MODE_IS_DP(em)) {
+ dce_v6_0_audio_dp_enable(encoder, 1);
+ }
+
+ /* enable audio after setting up hw */
+ dce_v6_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (enable && dig->afmt->enabled)
+ return;
+
+ if (!enable && !dig->afmt->enabled)
+ return;
+
+ if (!enable && dig->afmt->pin) {
+ dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
+ dig->afmt->pin = NULL;
+ }
+
+ dig->afmt->enabled = enable;
+
+ DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+ enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v6_0_afmt_init(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++)
+ adev->mode_info.afmt[i] = NULL;
+
+ /* DCE6 has audio blocks tied to DIG encoders */
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+ if (adev->mode_info.afmt[i]) {
+ adev->mode_info.afmt[i]->offset = dig_offsets[i];
+ adev->mode_info.afmt[i]->id = i;
+ } else {
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ DRM_ERROR("Out of memory allocating afmt table\n");
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static void dce_v6_0_afmt_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ kfree(adev->mode_info.afmt[i]);
+ adev->mode_info.afmt[i] = NULL;
+ }
+}
+
+static const u32 vga_control_regs[6] =
+{
+ mmD1VGA_CONTROL,
+ mmD2VGA_CONTROL,
+ mmD3VGA_CONTROL,
+ mmD4VGA_CONTROL,
+ mmD5VGA_CONTROL,
+ mmD6VGA_CONTROL,
+};
+
+static void dce_v6_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 vga_control;
+
+ vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | (enable ? 1 : 0));
+}
+
+static void dce_v6_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, enable ? 1 : 0);
+}
+
+static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, int atomic)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_framebuffer *target_fb;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *abo;
+ uint64_t fb_location, tiling_flags;
+ uint32_t fb_format, fb_pitch_pixels, pipe_config;
+ u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+ u32 viewport_w, viewport_h;
+ int r;
+ bool bypass_lut = false;
+
+ /* no fb bound */
+ if (!atomic && !crtc->primary->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ if (atomic)
+ target_fb = fb;
+ else
+ target_fb = crtc->primary->fb;
+
+ /* If atomic, assume fb object is pinned & idle & fenced and
+ * just update base pointers
+ */
+ obj = target_fb->obj[0];
+ abo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(abo, false);
+ if (unlikely(r != 0))
+ return r;
+
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(abo);
+ return -EINVAL;
+ }
+ }
+ fb_location = amdgpu_bo_gpu_offset(abo);
+
+ amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+ amdgpu_bo_unreserve(abo);
+
+ switch (target_fb->format->format) {
+ case DRM_FORMAT_C8:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+ break;
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_BGRX5551:
+ case DRM_FORMAT_BGRA5551:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_RGB565:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+#endif
+ break;
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+#endif
+ break;
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_BGRX1010102:
+ case DRM_FORMAT_BGRA1010102:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+#ifdef __BIG_ENDIAN
+ fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
+ GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+#ifdef __BIG_ENDIAN
+ fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+#endif
+ break;
+ default:
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
+ return -EINVAL;
+ }
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+ unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+ bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+ num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+ fb_format |= GRPH_NUM_BANKS(num_banks);
+ fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
+ fb_format |= GRPH_TILE_SPLIT(tile_split);
+ fb_format |= GRPH_BANK_WIDTH(bankw);
+ fb_format |= GRPH_BANK_HEIGHT(bankh);
+ fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+ } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+ fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+ }
+
+ pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+ fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+
+ dce_v6_0_vga_enable(crtc, false);
+
+ /* Make sure surface address is updated at vertical blank rather than
+ * horizontal blank
+ */
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+ WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+ /*
+ * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+ * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+ * retain the full precision throughout the pipeline.
+ */
+ WREG32_P(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset,
+ (bypass_lut ? GRPH_LUT_10BIT_BYPASS__GRPH_LUT_10BIT_BYPASS_EN_MASK : 0),
+ ~GRPH_LUT_10BIT_BYPASS__GRPH_LUT_10BIT_BYPASS_EN_MASK);
+
+ if (bypass_lut)
+ DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+ WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+ WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+ fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+ dce_v6_0_grph_enable(crtc, true);
+
+ WREG32(mmDESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+ target_fb->height);
+ x &= ~3;
+ y &= ~1;
+ WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+ (x << 16) | y);
+ viewport_w = crtc->mode.hdisplay;
+ viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+
+ WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+ (viewport_w << 16) | viewport_h);
+
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+ if (!atomic && fb && fb != crtc->primary->fb) {
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r != 0))
+ return r;
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+
+ /* Bytes per pixel may have changed */
+ dce_v6_0_bandwidth_update(adev);
+
+ return 0;
+
+}
+
+static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
+ INTERLEAVE_EN);
+ else
+ WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
+}
+
+static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u16 *r, *g, *b;
+ int i;
+
+ DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+ WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
+ PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
+ WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
+ PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
+ WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+
+ WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+ WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+ WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+ r = crtc->gamma_store;
+ g = r + crtc->gamma_size;
+ b = g + crtc->gamma_size;
+ for (i = 0; i < 256; i++) {
+ WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+ ((*r++ & 0xffc0) << 14) |
+ ((*g++ & 0xffc0) << 4) |
+ (*b++ >> 6));
+ }
+
+ WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ ICON_DEGAMMA_MODE(0) |
+ (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+ ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ /* XXX match this to the depth of the crtc fmt block, move to modeset? */
+ WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
+
+
+}
+
+static int dce_v6_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ return dig->linkb ? 1 : 0;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ return dig->linkb ? 3 : 2;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ return dig->linkb ? 5 : 4;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ return 6;
+ default:
+ DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+ return 0;
+ }
+}
+
+/**
+ * dce_v6_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders. For non-DP
+ * monitors a dedicated PPLL must be used. If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself. If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ *
+ */
+static u32 dce_v6_0_pick_pll(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 pll_in_use;
+ int pll;
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+ if (adev->clock.dp_extclk)
+ /* skip PPLL programming if using ext clock */
+ return ATOM_PPLL_INVALID;
+ else
+ return ATOM_PPLL0;
+ } else {
+ /* use the same PPLL for all monitors with the same clock */
+ pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+
+ /* PPLL1, and PPLL2 */
+ pll_in_use = amdgpu_pll_get_use_mask(crtc);
+ if (!(pll_in_use & (1 << ATOM_PPLL2)))
+ return ATOM_PPLL2;
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+}
+
+static void dce_v6_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint32_t cur_lock;
+
+ cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+ if (lock)
+ cur_lock |= CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+ else
+ cur_lock &= ~CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+ WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+ (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+ (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+
+
+}
+
+static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(amdgpu_crtc->cursor_addr));
+ WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(amdgpu_crtc->cursor_addr));
+
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+ CUR_CONTROL__CURSOR_EN_MASK |
+ (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+ (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+
+}
+
+static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
+ int x, int y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ int xorigin = 0, yorigin = 0;
+
+ int w = amdgpu_crtc->cursor_width;
+
+ amdgpu_crtc->cursor_x = x;
+ amdgpu_crtc->cursor_y = y;
+
+ /* avivo cursor are offset into the total surface */
+ x += crtc->x;
+ y += crtc->y;
+ DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+ if (x < 0) {
+ xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+ x = 0;
+ }
+ if (y < 0) {
+ yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+ y = 0;
+ }
+
+ WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+ WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+ WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+ ((w - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+ return 0;
+}
+
+static int dce_v6_0_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int ret;
+
+ dce_v6_0_lock_cursor(crtc, true);
+ ret = dce_v6_0_cursor_move_locked(crtc, x, y);
+ dce_v6_0_lock_cursor(crtc, false);
+
+ return ret;
+}
+
+static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *aobj;
+ int ret;
+
+ if (!handle) {
+ /* turn off cursor */
+ dce_v6_0_hide_cursor(crtc);
+ obj = NULL;
+ goto unpin;
+ }
+
+ if ((width > amdgpu_crtc->max_cursor_width) ||
+ (height > amdgpu_crtc->max_cursor_height)) {
+ DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+ return -ENOENT;
+ }
+
+ aobj = gem_to_amdgpu_bo(obj);
+ ret = amdgpu_bo_reserve(aobj, false);
+ if (ret != 0) {
+ drm_gem_object_put(obj);
+ return ret;
+ }
+
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_bo_unreserve(aobj);
+ if (ret) {
+ DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+ drm_gem_object_put(obj);
+ return ret;
+ }
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+ dce_v6_0_lock_cursor(crtc, true);
+
+ if (width != amdgpu_crtc->cursor_width ||
+ height != amdgpu_crtc->cursor_height ||
+ hot_x != amdgpu_crtc->cursor_hot_x ||
+ hot_y != amdgpu_crtc->cursor_hot_y) {
+ int x, y;
+
+ x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+ y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+ dce_v6_0_cursor_move_locked(crtc, x, y);
+
+ amdgpu_crtc->cursor_width = width;
+ amdgpu_crtc->cursor_height = height;
+ amdgpu_crtc->cursor_hot_x = hot_x;
+ amdgpu_crtc->cursor_hot_y = hot_y;
+ }
+
+ dce_v6_0_show_cursor(crtc);
+ dce_v6_0_lock_cursor(crtc, false);
+
+unpin:
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ ret = amdgpu_bo_reserve(aobj, true);
+ if (likely(ret == 0)) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ drm_gem_object_put(amdgpu_crtc->cursor_bo);
+ }
+
+ amdgpu_crtc->cursor_bo = obj;
+ return 0;
+}
+
+static void dce_v6_0_cursor_reset(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo) {
+ dce_v6_0_lock_cursor(crtc, true);
+
+ dce_v6_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+ amdgpu_crtc->cursor_y);
+
+ dce_v6_0_show_cursor(crtc);
+ dce_v6_0_lock_cursor(crtc, false);
+ }
+}
+
+static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ dce_v6_0_crtc_load_lut(crtc);
+
+ return 0;
+}
+
+static void dce_v6_0_crtc_destroy(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_crtc_cleanup(crtc);
+ kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = {
+ .cursor_set2 = dce_v6_0_crtc_cursor_set2,
+ .cursor_move = dce_v6_0_crtc_cursor_move,
+ .gamma_set = dce_v6_0_crtc_gamma_set,
+ .set_config = amdgpu_display_crtc_set_config,
+ .destroy = dce_v6_0_crtc_destroy,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
+ .get_vblank_counter = amdgpu_get_vblank_counter_kms,
+ .enable_vblank = amdgpu_enable_vblank_kms,
+ .disable_vblank = amdgpu_disable_vblank_kms,
+ .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ unsigned type;
+
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_crtc->enabled = true;
+ amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
+ amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+ drm_crtc_vblank_on(crtc);
+ dce_v6_0_crtc_load_lut(crtc);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ drm_crtc_vblank_off(crtc);
+ if (amdgpu_crtc->enabled)
+ amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+ amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+ amdgpu_crtc->enabled = false;
+ break;
+ }
+ /* adjust pm to dpms */
+ amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v6_0_crtc_prepare(struct drm_crtc *crtc)
+{
+ /* disable crtc pair power gating before programming */
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+ dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v6_0_crtc_commit(struct drm_crtc *crtc)
+{
+ dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
+{
+
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_atom_ss ss;
+ int i;
+
+ dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+ if (crtc->primary->fb) {
+ int r;
+ struct amdgpu_bo *abo;
+
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r))
+ DRM_ERROR("failed to reserve abo before unpin\n");
+ else {
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+ }
+ /* disable the GRPH */
+ dce_v6_0_grph_enable(crtc, false);
+
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i] &&
+ adev->mode_info.crtcs[i]->enabled &&
+ i != amdgpu_crtc->crtc_id &&
+ amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+ /* one other crtc is using this pll don't turn
+ * off the pll
+ */
+ goto done;
+ }
+ }
+
+ switch (amdgpu_crtc->pll_id) {
+ case ATOM_PPLL1:
+ case ATOM_PPLL2:
+ /* disable the ppll */
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ default:
+ break;
+ }
+done:
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v6_0_crtc_mode_set(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode,
+ int x, int y, struct drm_framebuffer *old_fb)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!amdgpu_crtc->adjusted_clock)
+ return -EINVAL;
+
+ amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+ amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+ dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+ amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+ amdgpu_atombios_crtc_scaler_setup(crtc);
+ dce_v6_0_cursor_reset(crtc);
+ /* update the hw version fpr dpm */
+ amdgpu_crtc->hw_mode = *adjusted_mode;
+
+ return 0;
+}
+
+static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct drm_encoder *encoder;
+
+ /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->crtc == crtc) {
+ amdgpu_crtc->encoder = encoder;
+ amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+ break;
+ }
+ }
+ if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ return false;
+ }
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ return false;
+ if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+ return false;
+ /* pick pll */
+ amdgpu_crtc->pll_id = dce_v6_0_pick_pll(crtc);
+ /* if we can't get a PPLL for a non-DP encoder, fail */
+ if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+ !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+ return false;
+
+ return true;
+}
+
+static int dce_v6_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)
+{
+ return dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v6_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, enum mode_set_atomic state)
+{
+ return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
+ .dpms = dce_v6_0_crtc_dpms,
+ .mode_fixup = dce_v6_0_crtc_mode_fixup,
+ .mode_set = dce_v6_0_crtc_mode_set,
+ .mode_set_base = dce_v6_0_crtc_set_base,
+ .mode_set_base_atomic = dce_v6_0_crtc_set_base_atomic,
+ .prepare = dce_v6_0_crtc_prepare,
+ .commit = dce_v6_0_crtc_commit,
+ .disable = dce_v6_0_crtc_disable,
+ .get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+ (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+ if (amdgpu_crtc == NULL)
+ return -ENOMEM;
+
+ drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v6_0_crtc_funcs);
+
+ drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+ amdgpu_crtc->crtc_id = index;
+ adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+ amdgpu_crtc->max_cursor_width = CURSOR_WIDTH;
+ amdgpu_crtc->max_cursor_height = CURSOR_HEIGHT;
+ adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+ adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+ amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id];
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+
+ return 0;
+}
+
+static int dce_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
+ adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
+
+ dce_v6_0_set_display_funcs(adev);
+
+ adev->mode_info.num_crtc = dce_v6_0_get_num_crtc(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 6;
+ break;
+ case CHIP_OLAND:
+ adev->mode_info.num_hpd = 2;
+ adev->mode_info.num_dig = 2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dce_v6_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int dce_v6_0_sw_init(void *handle)
+{
+ int r, i;
+ bool ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 8; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ if (r)
+ return r;
+ }
+
+ /* HPD hotplug */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ if (r)
+ return r;
+
+ adev->mode_info.mode_config_initialized = true;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ /* allocate crtcs */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = dce_v6_0_crtc_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
+ if (ret)
+ amdgpu_display_print_display_setup(adev_to_drm(adev));
+ else
+ return -EINVAL;
+
+ /* setup afmt */
+ r = dce_v6_0_afmt_init(adev);
+ if (r)
+ return r;
+
+ r = dce_v6_0_audio_init(adev);
+ if (r)
+ return r;
+
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ return r;
+}
+
+static int dce_v6_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ kfree(adev->mode_info.bios_hardcoded_edid);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+ dce_v6_0_audio_fini(adev);
+ dce_v6_0_afmt_fini(adev);
+
+ drm_mode_config_cleanup(adev_to_drm(adev));
+ adev->mode_info.mode_config_initialized = false;
+
+ return 0;
+}
+
+static int dce_v6_0_hw_init(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* disable vga render */
+ dce_v6_0_set_vga_render_state(adev, false);
+ /* init dig PHYs, disp eng pll */
+ amdgpu_atombios_encoder_init_dig(adev);
+ amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+ /* initialize hpd */
+ dce_v6_0_hpd_init(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v6_0_pageflip_interrupt_init(adev);
+
+ return 0;
+}
+
+static int dce_v6_0_hw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v6_0_hpd_fini(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v6_0_pageflip_interrupt_fini(adev);
+
+ flush_delayed_work(&adev->hotplug_work);
+
+ return 0;
+}
+
+static int dce_v6_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+ return dce_v6_0_hw_fini(handle);
+}
+
+static int dce_v6_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
+ ret = dce_v6_0_hw_init(handle);
+
+ /* turn on the BL */
+ if (adev->mode_info.bl_encoder) {
+ u8 bl_level = amdgpu_display_backlight_get_level(adev,
+ adev->mode_info.bl_encoder);
+ amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+ bl_level);
+ }
+ if (ret)
+ return ret;
+
+ return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v6_0_is_idle(void *handle)
+{
+ return true;
+}
+
+static int dce_v6_0_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int dce_v6_0_soft_reset(void *handle)
+{
+ DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n");
+ return 0;
+}
+
+static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg_block, interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (crtc) {
+ case 0:
+ reg_block = SI_CRTC0_REGISTER_OFFSET;
+ break;
+ case 1:
+ reg_block = SI_CRTC1_REGISTER_OFFSET;
+ break;
+ case 2:
+ reg_block = SI_CRTC2_REGISTER_OFFSET;
+ break;
+ case 3:
+ reg_block = SI_CRTC3_REGISTER_OFFSET;
+ break;
+ case 4:
+ reg_block = SI_CRTC4_REGISTER_OFFSET;
+ break;
+ case 5:
+ reg_block = SI_CRTC5_REGISTER_OFFSET;
+ break;
+ default:
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ interrupt_mask = RREG32(mmINT_MASK + reg_block);
+ interrupt_mask &= ~VBLANK_INT_MASK;
+ WREG32(mmINT_MASK + reg_block, interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ interrupt_mask = RREG32(mmINT_MASK + reg_block);
+ interrupt_mask |= VBLANK_INT_MASK;
+ WREG32(mmINT_MASK + reg_block, interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+
+}
+
+static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 dc_hpd_int_cntl;
+
+ if (type >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", type);
+ return 0;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+ dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+ dc_hpd_int_cntl |= DC_HPDx_INT_EN;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CRTC_IRQ_VBLANK1:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK2:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK3:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK4:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK5:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK6:
+ dce_v6_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE1:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE2:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE3:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE4:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE5:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE6:
+ dce_v6_0_set_crtc_vline_interrupt_state(adev, 5, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned crtc = entry->src_id - 1;
+ uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
+
+ switch (entry->src_data[0]) {
+ case 0: /* vblank */
+ if (disp_int & interrupt_status_offsets[crtc].vblank)
+ WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ if (amdgpu_irq_enabled(adev, source, irq_type)) {
+ drm_handle_vblank(adev_to_drm(adev), crtc);
+ }
+ DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+ break;
+ case 1: /* vline */
+ if (disp_int & interrupt_status_offsets[crtc].vline)
+ WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+ break;
+ default:
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg;
+
+ if (type >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", type);
+ return -EINVAL;
+ }
+
+ reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+ if (state == AMDGPU_IRQ_STATE_DISABLE)
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+ else
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+ return 0;
+}
+
+static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned long flags;
+ unsigned crtc_id;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_flip_work *works;
+
+ crtc_id = (entry->src_id - 8) >> 1;
+ amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+ if (crtc_id >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+ return -EINVAL;
+ }
+
+ if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+ WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+ /* IRQ could occur when in initial stage */
+ if (amdgpu_crtc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ works = amdgpu_crtc->pflip_works;
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+ "AMDGPU_FLIP_SUBMITTED(%d)\n",
+ amdgpu_crtc->pflip_status,
+ AMDGPU_FLIP_SUBMITTED);
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ return 0;
+ }
+
+ /* page flip completed. clean up */
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+ amdgpu_crtc->pflip_works = NULL;
+
+ /* wakeup usersapce */
+ if (works->event)
+ drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+ drm_crtc_vblank_put(&amdgpu_crtc->base);
+ schedule_work(&works->unpin_work);
+
+ return 0;
+}
+
+static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t disp_int, mask, tmp;
+ unsigned hpd;
+
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ return 0;
+ }
+
+ hpd = entry->src_data[0];
+ disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+ mask = interrupt_status_offsets[hpd].hpd;
+
+ if (disp_int & mask) {
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+ schedule_delayed_work(&adev->hotplug_work, 0);
+ DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+ }
+
+ return 0;
+
+}
+
+static int dce_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int dce_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
+ .name = "dce_v6_0",
+ .early_init = dce_v6_0_early_init,
+ .late_init = NULL,
+ .sw_init = dce_v6_0_sw_init,
+ .sw_fini = dce_v6_0_sw_fini,
+ .hw_init = dce_v6_0_hw_init,
+ .hw_fini = dce_v6_0_hw_fini,
+ .suspend = dce_v6_0_suspend,
+ .resume = dce_v6_0_resume,
+ .is_idle = dce_v6_0_is_idle,
+ .wait_for_idle = dce_v6_0_wait_for_idle,
+ .soft_reset = dce_v6_0_soft_reset,
+ .set_clockgating_state = dce_v6_0_set_clockgating_state,
+ .set_powergating_state = dce_v6_0_set_powergating_state,
+};
+
+static void
+dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+ /* need to call this here rather than in prepare() since we need some crtc info */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ /* set scaler clears this on some chips */
+ dce_v6_0_set_interleave(encoder->crtc, mode);
+
+ if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) {
+ dce_v6_0_afmt_enable(encoder, true);
+ dce_v6_0_afmt_setmode(encoder, adjusted_mode);
+ }
+}
+
+static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
+{
+
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if ((amdgpu_encoder->active_device &
+ (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+ ENCODER_OBJECT_ID_NONE)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ if (dig) {
+ dig->dig_encoder = dce_v6_0_pick_dig_encoder(encoder);
+ if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+ dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+ }
+ }
+
+ amdgpu_atombios_scratch_regs_lock(adev, true);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* select the clock/data port if it uses a router */
+ if (amdgpu_connector->router.cd_valid)
+ amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+ /* turn eDP panel on for mode set */
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ }
+
+ /* this is needed for the pll/ss setup to work correctly in some cases */
+ amdgpu_atombios_encoder_set_crtc_source(encoder);
+ /* set up the FMT blocks */
+ dce_v6_0_program_fmt(encoder);
+}
+
+static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
+{
+
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* need to call this here as we need the crtc set up */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
+{
+
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig;
+ int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ if (amdgpu_atombios_encoder_is_digital(encoder)) {
+ if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em))
+ dce_v6_0_afmt_enable(encoder, false);
+ dig = amdgpu_encoder->enc_priv;
+ dig->dig_encoder = -1;
+ }
+ amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v6_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static bool dce_v6_0_ext_mode_fixup(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ return true;
+}
+
+static const struct drm_encoder_helper_funcs dce_v6_0_ext_helper_funcs = {
+ .dpms = dce_v6_0_ext_dpms,
+ .mode_fixup = dce_v6_0_ext_mode_fixup,
+ .prepare = dce_v6_0_ext_prepare,
+ .mode_set = dce_v6_0_ext_mode_set,
+ .commit = dce_v6_0_ext_commit,
+ .disable = dce_v6_0_ext_disable,
+ /* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v6_0_dig_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v6_0_encoder_prepare,
+ .mode_set = dce_v6_0_encoder_mode_set,
+ .commit = dce_v6_0_encoder_commit,
+ .disable = dce_v6_0_encoder_disable,
+ .detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v6_0_dac_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v6_0_encoder_prepare,
+ .mode_set = dce_v6_0_encoder_mode_set,
+ .commit = dce_v6_0_encoder_commit,
+ .detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v6_0_encoder_destroy(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+ kfree(amdgpu_encoder->enc_priv);
+ drm_encoder_cleanup(encoder);
+ kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v6_0_encoder_funcs = {
+ .destroy = dce_v6_0_encoder_destroy,
+};
+
+static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
+ uint32_t encoder_enum,
+ uint32_t supported_device,
+ u16 caps)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ /* see if we already added it */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->encoder_enum == encoder_enum) {
+ amdgpu_encoder->devices |= supported_device;
+ return;
+ }
+
+ }
+
+ /* add a new one */
+ amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+ if (!amdgpu_encoder)
+ return;
+
+ encoder = &amdgpu_encoder->base;
+ switch (adev->mode_info.num_crtc) {
+ case 1:
+ encoder->possible_crtcs = 0x1;
+ break;
+ case 2:
+ default:
+ encoder->possible_crtcs = 0x3;
+ break;
+ case 4:
+ encoder->possible_crtcs = 0xf;
+ break;
+ case 6:
+ encoder->possible_crtcs = 0x3f;
+ break;
+ }
+
+ amdgpu_encoder->enc_priv = NULL;
+ amdgpu_encoder->encoder_enum = encoder_enum;
+ amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ amdgpu_encoder->devices = supported_device;
+ amdgpu_encoder->rmx_type = RMX_OFF;
+ amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+ amdgpu_encoder->is_ext_encoder = false;
+ amdgpu_encoder->caps = caps;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ drm_encoder_helper_add(encoder, &dce_v6_0_dac_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ amdgpu_encoder->rmx_type = RMX_FULL;
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ } else {
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ }
+ drm_encoder_helper_add(encoder, &dce_v6_0_dig_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_SI170B:
+ case ENCODER_OBJECT_ID_CH7303:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+ case ENCODER_OBJECT_ID_TITFP513:
+ case ENCODER_OBJECT_ID_VT1623:
+ case ENCODER_OBJECT_ID_HDMI_SI1930:
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ /* these are handled by the primary encoders */
+ amdgpu_encoder->is_ext_encoder = true;
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ else
+ drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ drm_encoder_helper_add(encoder, &dce_v6_0_ext_helper_funcs);
+ break;
+ }
+}
+
+static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
+ .bandwidth_update = &dce_v6_0_bandwidth_update,
+ .vblank_get_counter = &dce_v6_0_vblank_get_counter,
+ .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+ .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+ .hpd_sense = &dce_v6_0_hpd_sense,
+ .hpd_set_polarity = &dce_v6_0_hpd_set_polarity,
+ .hpd_get_gpio_reg = &dce_v6_0_hpd_get_gpio_reg,
+ .page_flip = &dce_v6_0_page_flip,
+ .page_flip_get_scanoutpos = &dce_v6_0_crtc_get_scanoutpos,
+ .add_encoder = &dce_v6_0_encoder_add,
+ .add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
+{
+ adev->mode_info.funcs = &dce_v6_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
+ .set = dce_v6_0_set_crtc_interrupt_state,
+ .process = dce_v6_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
+ .set = dce_v6_0_set_pageflip_interrupt_state,
+ .process = dce_v6_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
+ .set = dce_v6_0_set_hpd_interrupt_state,
+ .process = dce_v6_0_hpd_irq,
+};
+
+static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.num_crtc > 0)
+ adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+ else
+ adev->crtc_irq.num_types = 0;
+ adev->crtc_irq.funcs = &dce_v6_0_crtc_irq_funcs;
+
+ adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+ adev->pageflip_irq.funcs = &dce_v6_0_pageflip_irq_funcs;
+
+ adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+ adev->hpd_irq.funcs = &dce_v6_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v6_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &dce_v6_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v6_4_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 6,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &dce_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
new file mode 100644
index 000000000..7b546b596
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V6_0_H__
+#define __DCE_V6_0_H__
+
+extern const struct amdgpu_ip_block_version dce_v6_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v6_4_ip_block;
+
+void dce_v6_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
new file mode 100644
index 000000000..f2b3cb5ed
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -0,0 +1,3581 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "cikd.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "dce_v8_0.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "gca/gfx_7_2_enum.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[6] = {
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] = {
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
+ (0x13830 - 0x7030) >> 2,
+};
+
+static const struct {
+ uint32_t reg;
+ uint32_t vblank;
+ uint32_t vline;
+ uint32_t hpd;
+
+} interrupt_status_offsets[6] = { {
+ .reg = mmDISP_INTERRUPT_STATUS,
+ .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+ .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+ .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+ .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+ .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static u32 dce_v8_0_audio_endpt_rreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+ return r;
+}
+
+static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
+ u32 block_offset, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+ WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+ spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+ if (crtc >= adev->mode_info.num_crtc)
+ return 0;
+ else
+ return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v8_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Enable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Disable pflip interrupts */
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v8_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
+ */
+static void dce_v8_0_page_flip(struct amdgpu_device *adev,
+ int crtc_id, u64 crtc_base, bool async)
+{
+ struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+ struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+
+ /* flip at hsync for async, default is vsync */
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
+ GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+ /* update pitch */
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+ fb->pitches[0] / fb->format->cpp[0]);
+ /* update the primary scanout addresses */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(crtc_base));
+ /* post the write */
+ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+ u32 *vbl, u32 *position)
+{
+ if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+ return -EINVAL;
+
+ *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+ *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+ return 0;
+}
+
+/**
+ * dce_v8_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v8_0_hpd_sense(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ bool connected = false;
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return connected;
+
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ connected = true;
+
+ return connected;
+}
+
+/**
+ * dce_v8_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
+ enum amdgpu_hpd_id hpd)
+{
+ u32 tmp;
+ bool connected = dce_v8_0_hpd_sense(adev, hpd);
+
+ if (hpd >= adev->mode_info.num_hpd)
+ return;
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ if (connected)
+ tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+ else
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v8_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp |= DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+ connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+ /* don't try to enable hpd on eDP or LVDS avoid breaking the
+ * aux dp channel on imac and help (but not completely fix)
+ * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+ * also avoid interrupt storms during dpms.
+ */
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ continue;
+ }
+
+ dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+ amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v8_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ u32 tmp;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+ continue;
+
+ tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+ tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+ amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+ }
+ drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+ return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
+static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev,
+ bool render)
+{
+ u32 tmp;
+
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ if (render)
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+}
+
+static int dce_v8_0_get_num_crtc(struct amdgpu_device *adev)
+{
+ int num_crtc = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ num_crtc = 6;
+ break;
+ case CHIP_KAVERI:
+ num_crtc = 4;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ num_crtc = 2;
+ break;
+ default:
+ num_crtc = 0;
+ }
+ return num_crtc;
+}
+
+void dce_v8_0_disable_dce(struct amdgpu_device *adev)
+{
+ /*Disable VGA render and enabled crtc, if has DCE engine*/
+ if (amdgpu_atombios_has_dce_engine_info(adev)) {
+ u32 tmp;
+ int crtc_enabled, i;
+
+ dce_v8_0_set_vga_render_state(adev, false);
+
+ /*Disable crtc*/
+ for (i = 0; i < dce_v8_0_get_num_crtc(adev); i++) {
+ crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
+ CRTC_CONTROL, CRTC_MASTER_EN);
+ if (crtc_enabled) {
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+ tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+ tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
+ WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+ WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+ }
+ }
+ }
+}
+
+static void dce_v8_0_program_fmt(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ int bpc = 0;
+ u32 tmp = 0;
+ enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ bpc = amdgpu_connector_get_monitor_bpc(connector);
+ dither = amdgpu_connector->dither;
+ }
+
+ /* LVDS/eDP FMT is set up by atom */
+ if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+ return;
+
+ /* not needed for analog */
+ if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
+ (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
+ return;
+
+ if (bpc == 0)
+ return;
+
+ switch (bpc) {
+ case 6:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE)
+ /* XXX sort out optimal dither settings */
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+ (0 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+ else
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+ (0 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+ break;
+ case 8:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE)
+ /* XXX sort out optimal dither settings */
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+ (1 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+ else
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+ (1 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+ break;
+ case 10:
+ if (dither == AMDGPU_FMT_DITHER_ENABLE)
+ /* XXX sort out optimal dither settings */
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+ FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+ (2 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+ else
+ tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+ (2 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+ break;
+ default:
+ /* not needed */
+ break;
+ }
+
+ WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+
+/* display watermark setup */
+/**
+ * dce_v8_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v8_0_line_buffer_adjust(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ struct drm_display_mode *mode)
+{
+ u32 tmp, buffer_alloc, i;
+ u32 pipe_offset = amdgpu_crtc->crtc_id * 0x8;
+ /*
+ * Line Buffer Setup
+ * There are 6 line buffers, one for each display controllers.
+ * There are 3 partitions per LB. Select the number of partitions
+ * to enable based on the display width. For display widths larger
+ * than 4096, you need use to use 2 display controllers and combine
+ * them using the stereo blender.
+ */
+ if (amdgpu_crtc->base.enabled && mode) {
+ if (mode->crtc_hdisplay < 1920) {
+ tmp = 1;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 2560) {
+ tmp = 2;
+ buffer_alloc = 2;
+ } else if (mode->crtc_hdisplay < 4096) {
+ tmp = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ } else {
+ DRM_DEBUG_KMS("Mode too big for LB!\n");
+ tmp = 0;
+ buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+ }
+ } else {
+ tmp = 1;
+ buffer_alloc = 0;
+ }
+
+ WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset,
+ (tmp << LB_MEMORY_CTRL__LB_MEMORY_CONFIG__SHIFT) |
+ (0x6B0 << LB_MEMORY_CTRL__LB_MEMORY_SIZE__SHIFT));
+
+ WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+ (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+ PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATION_COMPLETED_MASK)
+ break;
+ udelay(1);
+ }
+
+ if (amdgpu_crtc->base.enabled && mode) {
+ switch (tmp) {
+ case 0:
+ default:
+ return 4096 * 2;
+ case 1:
+ return 1920 * 2;
+ case 2:
+ return 2560 * 2;
+ }
+ }
+
+ /* controller not enabled, so no lb used */
+ return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+ switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
+ case 0:
+ default:
+ return 1;
+ case 1:
+ return 2;
+ case 2:
+ return 4;
+ case 3:
+ return 8;
+ case 4:
+ return 3;
+ case 5:
+ return 6;
+ case 6:
+ return 10;
+ case 7:
+ return 12;
+ case 8:
+ return 16;
+ }
+}
+
+struct dce8_wm_params {
+ u32 dram_channels; /* number of dram channels */
+ u32 yclk; /* bandwidth per dram data pin in kHz */
+ u32 sclk; /* engine clock in kHz */
+ u32 disp_clk; /* display clock in kHz */
+ u32 src_width; /* viewport width */
+ u32 active_time; /* active display time in ns */
+ u32 blank_time; /* blank time in ns */
+ bool interlaced; /* mode is interlaced */
+ fixed20_12 vsc; /* vertical scale ratio */
+ u32 num_heads; /* number of active crtcs */
+ u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+ u32 lb_size; /* line buffer allocated to pipe */
+ u32 vtaps; /* vertical scaler taps */
+};
+
+/**
+ * dce_v8_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_dram_bandwidth(struct dce8_wm_params *wm)
+{
+ /* Calculate raw DRAM Bandwidth */
+ fixed20_12 dram_efficiency; /* 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ dram_efficiency.full = dfixed_const(7);
+ dram_efficiency.full = dfixed_div(dram_efficiency, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v8_0_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+ /* Calculate DRAM Bandwidth and the part allocated to display. */
+ fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+ fixed20_12 yclk, dram_channels, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ yclk.full = dfixed_const(wm->yclk);
+ yclk.full = dfixed_div(yclk, a);
+ dram_channels.full = dfixed_const(wm->dram_channels * 4);
+ a.full = dfixed_const(10);
+ disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+ disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+ bandwidth.full = dfixed_mul(dram_channels, yclk);
+ bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_data_return_bandwidth(struct dce8_wm_params *wm)
+{
+ /* Calculate the display Data return Bandwidth */
+ fixed20_12 return_efficiency; /* 0.8 */
+ fixed20_12 sclk, bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ sclk.full = dfixed_const(wm->sclk);
+ sclk.full = dfixed_div(sclk, a);
+ a.full = dfixed_const(10);
+ return_efficiency.full = dfixed_const(8);
+ return_efficiency.full = dfixed_div(return_efficiency, a);
+ a.full = dfixed_const(32);
+ bandwidth.full = dfixed_mul(a, sclk);
+ bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_dmif_request_bandwidth(struct dce8_wm_params *wm)
+{
+ /* Calculate the DMIF Request Bandwidth */
+ fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+ fixed20_12 disp_clk, bandwidth;
+ fixed20_12 a, b;
+
+ a.full = dfixed_const(1000);
+ disp_clk.full = dfixed_const(wm->disp_clk);
+ disp_clk.full = dfixed_div(disp_clk, a);
+ a.full = dfixed_const(32);
+ b.full = dfixed_mul(a, disp_clk);
+
+ a.full = dfixed_const(10);
+ disp_clk_request_efficiency.full = dfixed_const(8);
+ disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+ bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_available_bandwidth(struct dce8_wm_params *wm)
+{
+ /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+ u32 dram_bandwidth = dce_v8_0_dram_bandwidth(wm);
+ u32 data_return_bandwidth = dce_v8_0_data_return_bandwidth(wm);
+ u32 dmif_req_bandwidth = dce_v8_0_dmif_request_bandwidth(wm);
+
+ return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v8_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_average_bandwidth(struct dce8_wm_params *wm)
+{
+ /* Calculate the display mode Average Bandwidth
+ * DisplayMode should contain the source and destination dimensions,
+ * timing, etc.
+ */
+ fixed20_12 bpp;
+ fixed20_12 line_time;
+ fixed20_12 src_width;
+ fixed20_12 bandwidth;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1000);
+ line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+ line_time.full = dfixed_div(line_time, a);
+ bpp.full = dfixed_const(wm->bytes_per_pixel);
+ src_width.full = dfixed_const(wm->src_width);
+ bandwidth.full = dfixed_mul(src_width, bpp);
+ bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+ bandwidth.full = dfixed_div(bandwidth, line_time);
+
+ return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm)
+{
+ /* First calculate the latency in ns */
+ u32 mc_latency = 2000; /* 2000 ns. */
+ u32 available_bandwidth = dce_v8_0_available_bandwidth(wm);
+ u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+ u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+ u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+ u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+ (wm->num_heads * cursor_line_pair_return_time);
+ u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+ u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+ u32 tmp, dmif_size = 12288;
+ fixed20_12 a, b, c;
+
+ if (wm->num_heads == 0)
+ return 0;
+
+ a.full = dfixed_const(2);
+ b.full = dfixed_const(1);
+ if ((wm->vsc.full > a.full) ||
+ ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+ (wm->vtaps >= 5) ||
+ ((wm->vsc.full >= a.full) && wm->interlaced))
+ max_src_lines_per_dst_line = 4;
+ else
+ max_src_lines_per_dst_line = 2;
+
+ a.full = dfixed_const(available_bandwidth);
+ b.full = dfixed_const(wm->num_heads);
+ a.full = dfixed_div(a, b);
+ tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+ tmp = min(dfixed_trunc(a), tmp);
+
+ lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+ a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+ b.full = dfixed_const(1000);
+ c.full = dfixed_const(lb_fill_bw);
+ b.full = dfixed_div(c, b);
+ a.full = dfixed_div(a, b);
+ line_fill_time = dfixed_trunc(a);
+
+ if (line_fill_time < wm->active_time)
+ return latency;
+ else
+ return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+ if (dce_v8_0_average_bandwidth(wm) <=
+ (dce_v8_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v8_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
+{
+ if (dce_v8_0_average_bandwidth(wm) <=
+ (dce_v8_0_available_bandwidth(wm) / wm->num_heads))
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v8_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_check_latency_hiding(struct dce8_wm_params *wm)
+{
+ u32 lb_partitions = wm->lb_size / wm->src_width;
+ u32 line_time = wm->active_time + wm->blank_time;
+ u32 latency_tolerant_lines;
+ u32 latency_hiding;
+ fixed20_12 a;
+
+ a.full = dfixed_const(1);
+ if (wm->vsc.full > a.full)
+ latency_tolerant_lines = 1;
+ else {
+ if (lb_partitions <= (wm->vtaps + 1))
+ latency_tolerant_lines = 1;
+ else
+ latency_tolerant_lines = 2;
+ }
+
+ latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+ if (dce_v8_0_latency_watermark(wm) <= latency_hiding)
+ return true;
+ else
+ return false;
+}
+
+/**
+ * dce_v8_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
+ struct amdgpu_crtc *amdgpu_crtc,
+ u32 lb_size, u32 num_heads)
+{
+ struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+ struct dce8_wm_params wm_low, wm_high;
+ u32 active_time;
+ u32 line_time = 0;
+ u32 latency_watermark_a = 0, latency_watermark_b = 0;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
+
+ if (amdgpu_crtc->base.enabled && num_heads && mode) {
+ active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+ (u32)mode->clock);
+ line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+ (u32)mode->clock);
+ line_time = min(line_time, (u32)65535);
+
+ /* watermark for high clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_high.yclk =
+ amdgpu_dpm_get_mclk(adev, false) * 10;
+ wm_high.sclk =
+ amdgpu_dpm_get_sclk(adev, false) * 10;
+ } else {
+ wm_high.yclk = adev->pm.current_mclk * 10;
+ wm_high.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_high.disp_clk = mode->clock;
+ wm_high.src_width = mode->crtc_hdisplay;
+ wm_high.active_time = active_time;
+ wm_high.blank_time = line_time - wm_high.active_time;
+ wm_high.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_high.interlaced = true;
+ wm_high.vsc = amdgpu_crtc->vsc;
+ wm_high.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_high.vtaps = 2;
+ wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_high.lb_size = lb_size;
+ wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_high.num_heads = num_heads;
+
+ /* set for high clocks */
+ latency_watermark_a = min(dce_v8_0_latency_watermark(&wm_high), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+ !dce_v8_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+ !dce_v8_0_check_latency_hiding(&wm_high) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+
+ /* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
+ wm_low.yclk =
+ amdgpu_dpm_get_mclk(adev, true) * 10;
+ wm_low.sclk =
+ amdgpu_dpm_get_sclk(adev, true) * 10;
+ } else {
+ wm_low.yclk = adev->pm.current_mclk * 10;
+ wm_low.sclk = adev->pm.current_sclk * 10;
+ }
+
+ wm_low.disp_clk = mode->clock;
+ wm_low.src_width = mode->crtc_hdisplay;
+ wm_low.active_time = active_time;
+ wm_low.blank_time = line_time - wm_low.active_time;
+ wm_low.interlaced = false;
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ wm_low.interlaced = true;
+ wm_low.vsc = amdgpu_crtc->vsc;
+ wm_low.vtaps = 1;
+ if (amdgpu_crtc->rmx_type != RMX_OFF)
+ wm_low.vtaps = 2;
+ wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+ wm_low.lb_size = lb_size;
+ wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
+ wm_low.num_heads = num_heads;
+
+ /* set for low clocks */
+ latency_watermark_b = min(dce_v8_0_latency_watermark(&wm_low), (u32)65535);
+
+ /* possibly force display priority to high */
+ /* should really do this at mode validation time... */
+ if (!dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+ !dce_v8_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+ !dce_v8_0_check_latency_hiding(&wm_low) ||
+ (adev->mode_info.disp_priority == 2)) {
+ DRM_DEBUG_KMS("force priority to high\n");
+ }
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+ }
+
+ /* select wm A */
+ wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp = wm_mask;
+ tmp &= ~(3 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (1 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+ ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+ (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+ /* select wm B */
+ tmp = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+ tmp &= ~(3 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (2 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+ WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+ ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+ (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+ /* restore original selection */
+ WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
+
+ /* save values for DPM */
+ amdgpu_crtc->line_time = line_time;
+ amdgpu_crtc->wm_high = latency_watermark_a;
+ amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/**
+ * dce_v8_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev)
+{
+ struct drm_display_mode *mode = NULL;
+ u32 num_heads = 0, lb_size;
+ int i;
+
+ amdgpu_display_update_priority(adev);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i]->base.enabled)
+ num_heads++;
+ }
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ mode = &adev->mode_info.crtcs[i]->base.mode;
+ lb_size = dce_v8_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
+ dce_v8_0_program_watermarks(adev, adev->mode_info.crtcs[i],
+ lb_size, num_heads);
+ }
+}
+
+static void dce_v8_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+ int i;
+ u32 offset, tmp;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ offset = adev->mode_info.audio.pin[i].offset;
+ tmp = RREG32_AUDIO_ENDPT(offset,
+ ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+ if (((tmp &
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
+ adev->mode_info.audio.pin[i].connected = false;
+ else
+ adev->mode_info.audio.pin[i].connected = true;
+ }
+}
+
+static struct amdgpu_audio_pin *dce_v8_0_audio_get_pin(struct amdgpu_device *adev)
+{
+ int i;
+
+ dce_v8_0_audio_get_connected_pins(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ if (adev->mode_info.audio.pin[i].connected)
+ return &adev->mode_info.audio.pin[i];
+ }
+ DRM_ERROR("No connected audio pins found!\n");
+ return NULL;
+}
+
+static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->offset;
+
+ WREG32(mmAFMT_AUDIO_SRC_CONTROL + offset,
+ (dig->afmt->pin->id << AFMT_AUDIO_SRC_CONTROL__AFMT_AUDIO_SRC_SELECT__SHIFT));
+}
+
+static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 tmp = 0, offset;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+ if (connector->latency_present[1])
+ tmp =
+ (connector->video_latency[1] <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+ (connector->audio_latency[1] <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+ else
+ tmp =
+ (0 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+ (0 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+ } else {
+ if (connector->latency_present[0])
+ tmp =
+ (connector->video_latency[0] <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+ (connector->audio_latency[0] <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+ else
+ tmp =
+ (0 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+ (0 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+
+ }
+ WREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ u32 offset, tmp;
+ u8 *sadb = NULL;
+ int sad_count;
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ if (sad_count < 0) {
+ DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
+ }
+
+ /* program the speaker allocation */
+ tmp = RREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+ tmp &= ~(AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__DP_CONNECTION_MASK |
+ AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION_MASK);
+ /* set HDMI mode */
+ tmp |= AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__HDMI_CONNECTION_MASK;
+ if (sad_count)
+ tmp |= (sadb[0] << AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION__SHIFT);
+ else
+ tmp |= (5 << AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION__SHIFT); /* stereo */
+ WREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+ kfree(sadb);
+}
+
+static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *amdgpu_connector = NULL;
+ struct cea_sad *sads;
+ int i, sad_count;
+
+ static const u16 eld_reg_to_type[][2] = {
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+ { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+ };
+
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ if (connector->encoder == encoder) {
+ amdgpu_connector = to_amdgpu_connector(connector);
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (!amdgpu_connector) {
+ DRM_ERROR("Couldn't find encoder's connector\n");
+ return;
+ }
+
+ sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ if (sad_count < 0)
+ DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
+ return;
+ BUG_ON(!sads);
+
+ for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+ u32 value = 0;
+ u8 stereo_freqs = 0;
+ int max_channels = -1;
+ int j;
+
+ for (j = 0; j < sad_count; j++) {
+ struct cea_sad *sad = &sads[j];
+
+ if (sad->format == eld_reg_to_type[i][1]) {
+ if (sad->channels > max_channels) {
+ value = (sad->channels <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+ (sad->byte2 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+ (sad->freq <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
+ max_channels = sad->channels;
+ }
+
+ if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+ stereo_freqs |= sad->freq;
+ else
+ break;
+ }
+ }
+
+ value |= (stereo_freqs <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+ WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
+ }
+
+ kfree(sads);
+}
+
+static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
+ struct amdgpu_audio_pin *pin,
+ bool enable)
+{
+ if (!pin)
+ return;
+
+ WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+ enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[7] = {
+ (0x1780 - 0x1780),
+ (0x1786 - 0x1780),
+ (0x178c - 0x1780),
+ (0x1792 - 0x1780),
+ (0x1798 - 0x1780),
+ (0x179d - 0x1780),
+ (0x17a4 - 0x1780),
+};
+
+static int dce_v8_0_audio_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return 0;
+
+ adev->mode_info.audio.enabled = true;
+
+ if (adev->asic_type == CHIP_KAVERI) /* KV: 4 streams, 7 endpoints */
+ adev->mode_info.audio.num_pins = 7;
+ else if ((adev->asic_type == CHIP_KABINI) ||
+ (adev->asic_type == CHIP_MULLINS)) /* KB/ML: 2 streams, 3 endpoints */
+ adev->mode_info.audio.num_pins = 3;
+ else if ((adev->asic_type == CHIP_BONAIRE) ||
+ (adev->asic_type == CHIP_HAWAII))/* BN/HW: 6 streams, 7 endpoints */
+ adev->mode_info.audio.num_pins = 7;
+ else
+ adev->mode_info.audio.num_pins = 3;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ adev->mode_info.audio.pin[i].channels = -1;
+ adev->mode_info.audio.pin[i].rate = -1;
+ adev->mode_info.audio.pin[i].bits_per_sample = -1;
+ adev->mode_info.audio.pin[i].status_bits = 0;
+ adev->mode_info.audio.pin[i].category_code = 0;
+ adev->mode_info.audio.pin[i].connected = false;
+ adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+ adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
+ dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ return 0;
+}
+
+static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (!amdgpu_audio)
+ return;
+
+ if (!adev->mode_info.audio.enabled)
+ return;
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++)
+ dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+
+ adev->mode_info.audio.enabled = false;
+}
+
+/*
+ * update the N and CTS parameters for a given pixel clock rate
+ */
+static void dce_v8_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ uint32_t offset = dig->afmt->offset;
+
+ WREG32(mmHDMI_ACR_32_0 + offset, (acr.cts_32khz << HDMI_ACR_32_0__HDMI_ACR_CTS_32__SHIFT));
+ WREG32(mmHDMI_ACR_32_1 + offset, acr.n_32khz);
+
+ WREG32(mmHDMI_ACR_44_0 + offset, (acr.cts_44_1khz << HDMI_ACR_44_0__HDMI_ACR_CTS_44__SHIFT));
+ WREG32(mmHDMI_ACR_44_1 + offset, acr.n_44_1khz);
+
+ WREG32(mmHDMI_ACR_48_0 + offset, (acr.cts_48khz << HDMI_ACR_48_0__HDMI_ACR_CTS_48__SHIFT));
+ WREG32(mmHDMI_ACR_48_1 + offset, acr.n_48khz);
+}
+
+/*
+ * build a HDMI Video Info Frame
+ */
+static void dce_v8_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
+ void *buffer, size_t size)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ uint32_t offset = dig->afmt->offset;
+ uint8_t *frame = buffer + 3;
+ uint8_t *header = buffer;
+
+ WREG32(mmAFMT_AVI_INFO0 + offset,
+ frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
+ WREG32(mmAFMT_AVI_INFO1 + offset,
+ frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
+ WREG32(mmAFMT_AVI_INFO2 + offset,
+ frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
+ WREG32(mmAFMT_AVI_INFO3 + offset,
+ frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
+}
+
+static void dce_v8_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ u32 dto_phase = 24 * 1000;
+ u32 dto_modulo = clock;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* XXX two dtos; generally use dto0 for hdmi */
+ /* Express [24MHz / target pixel clock] as an exact rational
+ * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
+ * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+ */
+ WREG32(mmDCCG_AUDIO_DTO_SOURCE, (amdgpu_crtc->crtc_id << DCCG_AUDIO_DTO_SOURCE__DCCG_AUDIO_DTO0_SOURCE_SEL__SHIFT));
+ WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
+ WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+ u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+ struct hdmi_avi_infoframe frame;
+ uint32_t offset, val;
+ ssize_t err;
+ int bpc = 8;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (!dig->afmt->enabled)
+ return;
+
+ offset = dig->afmt->offset;
+
+ /* hdmi deep color mode general control packets setup, if bpc > 8 */
+ if (encoder->crtc) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+ bpc = amdgpu_crtc->bpc;
+ }
+
+ /* disable audio prior to setting up hw */
+ dig->afmt->pin = dce_v8_0_audio_get_pin(adev);
+ dce_v8_0_audio_enable(adev, dig->afmt->pin, false);
+
+ dce_v8_0_audio_set_dto(encoder, mode->clock);
+
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + offset,
+ HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK); /* send null packets when required */
+
+ WREG32(mmAFMT_AUDIO_CRC_CONTROL + offset, 0x1000);
+
+ val = RREG32(mmHDMI_CONTROL + offset);
+ val &= ~HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+ val &= ~HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH_MASK;
+
+ switch (bpc) {
+ case 0:
+ case 6:
+ case 8:
+ case 16:
+ default:
+ DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+ connector->name, bpc);
+ break;
+ case 10:
+ val |= HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+ val |= 1 << HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH__SHIFT;
+ DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+ connector->name);
+ break;
+ case 12:
+ val |= HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+ val |= 2 << HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH__SHIFT;
+ DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+ connector->name);
+ break;
+ }
+
+ WREG32(mmHDMI_CONTROL + offset, val);
+
+ WREG32(mmHDMI_VBI_PACKET_CONTROL + offset,
+ HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK | /* send null packets when required */
+ HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK | /* send general control packets */
+ HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK); /* send general control packets every frame */
+
+ WREG32(mmHDMI_INFOFRAME_CONTROL0 + offset,
+ HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND_MASK | /* enable audio info frames (frames won't be set until audio is enabled) */
+ HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_CONT_MASK); /* required for audio info values to be updated */
+
+ WREG32(mmAFMT_INFOFRAME_CONTROL0 + offset,
+ AFMT_INFOFRAME_CONTROL0__AFMT_AUDIO_INFO_UPDATE_MASK); /* required for audio info values to be updated */
+
+ WREG32(mmHDMI_INFOFRAME_CONTROL1 + offset,
+ (2 << HDMI_INFOFRAME_CONTROL1__HDMI_AUDIO_INFO_LINE__SHIFT)); /* anything other than 0 */
+
+ WREG32(mmHDMI_GC + offset, 0); /* unset HDMI_GC_AVMUTE */
+
+ WREG32(mmHDMI_AUDIO_PACKET_CONTROL + offset,
+ (1 << HDMI_AUDIO_PACKET_CONTROL__HDMI_AUDIO_DELAY_EN__SHIFT) | /* set the default audio delay */
+ (3 << HDMI_AUDIO_PACKET_CONTROL__HDMI_AUDIO_PACKETS_PER_LINE__SHIFT)); /* should be suffient for all audio modes and small enough for all hblanks */
+
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL + offset,
+ AFMT_AUDIO_PACKET_CONTROL__AFMT_60958_CS_UPDATE_MASK); /* allow 60958 channel status fields to be updated */
+
+ /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */
+
+ if (bpc > 8)
+ WREG32(mmHDMI_ACR_PACKET_CONTROL + offset,
+ HDMI_ACR_PACKET_CONTROL__HDMI_ACR_AUTO_SEND_MASK); /* allow hw to sent ACR packets when required */
+ else
+ WREG32(mmHDMI_ACR_PACKET_CONTROL + offset,
+ HDMI_ACR_PACKET_CONTROL__HDMI_ACR_SOURCE_MASK | /* select SW CTS value */
+ HDMI_ACR_PACKET_CONTROL__HDMI_ACR_AUTO_SEND_MASK); /* allow hw to sent ACR packets when required */
+
+ dce_v8_0_afmt_update_ACR(encoder, mode->clock);
+
+ WREG32(mmAFMT_60958_0 + offset,
+ (1 << AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L__SHIFT));
+
+ WREG32(mmAFMT_60958_1 + offset,
+ (2 << AFMT_60958_1__AFMT_60958_CS_CHANNEL_NUMBER_R__SHIFT));
+
+ WREG32(mmAFMT_60958_2 + offset,
+ (3 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_2__SHIFT) |
+ (4 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_3__SHIFT) |
+ (5 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_4__SHIFT) |
+ (6 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_5__SHIFT) |
+ (7 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_6__SHIFT) |
+ (8 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_7__SHIFT));
+
+ dce_v8_0_audio_write_speaker_allocation(encoder);
+
+
+ WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + offset,
+ (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
+
+ dce_v8_0_afmt_audio_select_pin(encoder);
+ dce_v8_0_audio_write_sad_regs(encoder);
+ dce_v8_0_audio_write_latency_fields(encoder, mode);
+
+ err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+ if (err < 0) {
+ DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+ if (err < 0) {
+ DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+ return;
+ }
+
+ dce_v8_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+ WREG32_OR(mmHDMI_INFOFRAME_CONTROL0 + offset,
+ HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK | /* enable AVI info frames */
+ HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_CONT_MASK); /* required for audio info values to be updated */
+
+ WREG32_P(mmHDMI_INFOFRAME_CONTROL1 + offset,
+ (2 << HDMI_INFOFRAME_CONTROL1__HDMI_AVI_INFO_LINE__SHIFT), /* anything other than 0 */
+ ~HDMI_INFOFRAME_CONTROL1__HDMI_AVI_INFO_LINE_MASK);
+
+ WREG32_OR(mmAFMT_AUDIO_PACKET_CONTROL + offset,
+ AFMT_AUDIO_PACKET_CONTROL__AFMT_AUDIO_SAMPLE_SEND_MASK); /* send audio packets */
+
+ WREG32(mmAFMT_RAMP_CONTROL0 + offset, 0x00FFFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL1 + offset, 0x007FFFFF);
+ WREG32(mmAFMT_RAMP_CONTROL2 + offset, 0x00000001);
+ WREG32(mmAFMT_RAMP_CONTROL3 + offset, 0x00000001);
+
+ /* enable audio after setting up hw */
+ dce_v8_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ if (!dig || !dig->afmt)
+ return;
+
+ /* Silent, r600_hdmi_enable will raise WARN for us */
+ if (enable && dig->afmt->enabled)
+ return;
+ if (!enable && !dig->afmt->enabled)
+ return;
+
+ if (!enable && dig->afmt->pin) {
+ dce_v8_0_audio_enable(adev, dig->afmt->pin, false);
+ dig->afmt->pin = NULL;
+ }
+
+ dig->afmt->enabled = enable;
+
+ DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+ enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++)
+ adev->mode_info.afmt[i] = NULL;
+
+ /* DCE8 has audio blocks tied to DIG encoders */
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+ if (adev->mode_info.afmt[i]) {
+ adev->mode_info.afmt[i]->offset = dig_offsets[i];
+ adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->mode_info.num_dig; i++) {
+ kfree(adev->mode_info.afmt[i]);
+ adev->mode_info.afmt[i] = NULL;
+ }
+}
+
+static const u32 vga_control_regs[6] = {
+ mmD1VGA_CONTROL,
+ mmD2VGA_CONTROL,
+ mmD3VGA_CONTROL,
+ mmD4VGA_CONTROL,
+ mmD5VGA_CONTROL,
+ mmD6VGA_CONTROL,
+};
+
+static void dce_v8_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 vga_control;
+
+ vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+ if (enable)
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
+ else
+ WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
+}
+
+static void dce_v8_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (enable)
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
+ else
+ WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
+}
+
+static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, int atomic)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct drm_framebuffer *target_fb;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *abo;
+ uint64_t fb_location, tiling_flags;
+ uint32_t fb_format, fb_pitch_pixels;
+ u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+ u32 pipe_config;
+ u32 viewport_w, viewport_h;
+ int r;
+ bool bypass_lut = false;
+
+ /* no fb bound */
+ if (!atomic && !crtc->primary->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ if (atomic)
+ target_fb = fb;
+ else
+ target_fb = crtc->primary->fb;
+
+ /* If atomic, assume fb object is pinned & idle & fenced and
+ * just update base pointers
+ */
+ obj = target_fb->obj[0];
+ abo = gem_to_amdgpu_bo(obj);
+ r = amdgpu_bo_reserve(abo, false);
+ if (unlikely(r != 0))
+ return r;
+
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(abo);
+ return -EINVAL;
+ }
+ }
+ fb_location = amdgpu_bo_gpu_offset(abo);
+
+ amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+ amdgpu_bo_unreserve(abo);
+
+ pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+
+ switch (target_fb->format->format) {
+ case DRM_FORMAT_C8:
+ fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ break;
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ case DRM_FORMAT_BGRX5551:
+ case DRM_FORMAT_BGRA5551:
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ case DRM_FORMAT_RGB565:
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ case DRM_FORMAT_XRGB2101010:
+ case DRM_FORMAT_ARGB2101010:
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_BGRX1010102:
+ case DRM_FORMAT_BGRA1010102:
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+ bypass_lut = true;
+ break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
+ default:
+ DRM_ERROR("Unsupported screen format %p4cc\n",
+ &target_fb->format->format);
+ return -EINVAL;
+ }
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+ unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+ bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+ num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+ fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+ fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+ fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+ fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+ fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
+ fb_format |= (DISPLAY_MICRO_TILING << GRPH_CONTROL__GRPH_MICRO_TILE_MODE__SHIFT);
+ } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+ fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ }
+
+ fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
+
+ dce_v8_0_vga_enable(crtc, false);
+
+ /* Make sure surface address is updated at vertical blank rather than
+ * horizontal blank
+ */
+ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(fb_location));
+ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+ WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+ /*
+ * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+ * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+ * retain the full precision throughout the pipeline.
+ */
+ WREG32_P(mmGRPH_LUT_10BIT_BYPASS_CONTROL + amdgpu_crtc->crtc_offset,
+ (bypass_lut ? LUT_10BIT_BYPASS_EN : 0),
+ ~LUT_10BIT_BYPASS_EN);
+
+ if (bypass_lut)
+ DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+ WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+ WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+ fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+ dce_v8_0_grph_enable(crtc, true);
+
+ WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+ target_fb->height);
+
+ x &= ~3;
+ y &= ~1;
+ WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+ (x << 16) | y);
+ viewport_w = crtc->mode.hdisplay;
+ viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+ WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+ (viewport_w << 16) | viewport_h);
+
+ /* set pageflip to happen anywhere in vblank interval */
+ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+ if (!atomic && fb && fb != crtc->primary->fb) {
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r != 0))
+ return r;
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+
+ /* Bytes per pixel may have changed */
+ dce_v8_0_bandwidth_update(adev);
+
+ return 0;
+}
+
+static void dce_v8_0_set_interleave(struct drm_crtc *crtc,
+ struct drm_display_mode *mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+ WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset,
+ LB_DATA_FORMAT__INTERLEAVE_EN__SHIFT);
+ else
+ WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
+}
+
+static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u16 *r, *g, *b;
+ int i;
+
+ DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+ WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+ ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
+ PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
+ WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
+ PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
+ WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+
+ WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+ WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+ WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+ WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+ WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+ WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+ r = crtc->gamma_store;
+ g = r + crtc->gamma_size;
+ b = g + crtc->gamma_size;
+ for (i = 0; i < 256; i++) {
+ WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+ ((*r++ & 0xffc0) << 14) |
+ ((*g++ & 0xffc0) << 4) |
+ (*b++ >> 6));
+ }
+
+ WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
+ ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+ ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+ ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ /* XXX match this to the depth of the crtc fmt block, move to modeset? */
+ WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
+ /* XXX this only needs to be programmed once per crtc at startup,
+ * not sure where the best place for it is
+ */
+ WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset,
+ ALPHA_CONTROL__CURSOR_ALPHA_BLND_ENA_MASK);
+}
+
+static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ if (dig->linkb)
+ return 1;
+ else
+ return 0;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ if (dig->linkb)
+ return 3;
+ else
+ return 2;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ if (dig->linkb)
+ return 5;
+ else
+ return 4;
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ return 6;
+ default:
+ DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+ return 0;
+ }
+}
+
+/**
+ * dce_v8_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders. For non-DP
+ * monitors a dedicated PPLL must be used. If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself. If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ * Asic specific PLL information
+ *
+ * DCE 8.x
+ * KB/KV
+ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
+ * CI
+ * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+ *
+ */
+static u32 dce_v8_0_pick_pll(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ u32 pll_in_use;
+ int pll;
+
+ if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+ if (adev->clock.dp_extclk)
+ /* skip PPLL programming if using ext clock */
+ return ATOM_PPLL_INVALID;
+ else {
+ /* use the same PPLL for all DP monitors */
+ pll = amdgpu_pll_get_shared_dp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+ } else {
+ /* use the same PPLL for all monitors with the same clock */
+ pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+ if (pll != ATOM_PPLL_INVALID)
+ return pll;
+ }
+ /* otherwise, pick one of the plls */
+ if ((adev->asic_type == CHIP_KABINI) ||
+ (adev->asic_type == CHIP_MULLINS)) {
+ /* KB/ML has PPLL1 and PPLL2 */
+ pll_in_use = amdgpu_pll_get_use_mask(crtc);
+ if (!(pll_in_use & (1 << ATOM_PPLL2)))
+ return ATOM_PPLL2;
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+ } else {
+ /* CI/KV has PPLL0, PPLL1, and PPLL2 */
+ pll_in_use = amdgpu_pll_get_use_mask(crtc);
+ if (!(pll_in_use & (1 << ATOM_PPLL2)))
+ return ATOM_PPLL2;
+ if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ return ATOM_PPLL1;
+ if (!(pll_in_use & (1 << ATOM_PPLL0)))
+ return ATOM_PPLL0;
+ DRM_ERROR("unable to allocate a PPLL\n");
+ return ATOM_PPLL_INVALID;
+ }
+ return ATOM_PPLL_INVALID;
+}
+
+static void dce_v8_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint32_t cur_lock;
+
+ cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+ if (lock)
+ cur_lock |= CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+ else
+ cur_lock &= ~CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+ WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v8_0_hide_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+ (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+ (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static void dce_v8_0_show_cursor(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+ upper_32_bits(amdgpu_crtc->cursor_addr));
+ WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+ lower_32_bits(amdgpu_crtc->cursor_addr));
+
+ WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+ CUR_CONTROL__CURSOR_EN_MASK |
+ (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+ (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,
+ int x, int y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ int xorigin = 0, yorigin = 0;
+
+ amdgpu_crtc->cursor_x = x;
+ amdgpu_crtc->cursor_y = y;
+
+ /* avivo cursor are offset into the total surface */
+ x += crtc->x;
+ y += crtc->y;
+ DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+ if (x < 0) {
+ xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+ x = 0;
+ }
+ if (y < 0) {
+ yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+ y = 0;
+ }
+
+ WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+ WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+ WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+ ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+ return 0;
+}
+
+static int dce_v8_0_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int ret;
+
+ dce_v8_0_lock_cursor(crtc, true);
+ ret = dce_v8_0_cursor_move_locked(crtc, x, y);
+ dce_v8_0_lock_cursor(crtc, false);
+
+ return ret;
+}
+
+static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height,
+ int32_t hot_x,
+ int32_t hot_y)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *aobj;
+ int ret;
+
+ if (!handle) {
+ /* turn off cursor */
+ dce_v8_0_hide_cursor(crtc);
+ obj = NULL;
+ goto unpin;
+ }
+
+ if ((width > amdgpu_crtc->max_cursor_width) ||
+ (height > amdgpu_crtc->max_cursor_height)) {
+ DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+ return -ENOENT;
+ }
+
+ aobj = gem_to_amdgpu_bo(obj);
+ ret = amdgpu_bo_reserve(aobj, false);
+ if (ret != 0) {
+ drm_gem_object_put(obj);
+ return ret;
+ }
+
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_bo_unreserve(aobj);
+ if (ret) {
+ DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+ drm_gem_object_put(obj);
+ return ret;
+ }
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+ dce_v8_0_lock_cursor(crtc, true);
+
+ if (width != amdgpu_crtc->cursor_width ||
+ height != amdgpu_crtc->cursor_height ||
+ hot_x != amdgpu_crtc->cursor_hot_x ||
+ hot_y != amdgpu_crtc->cursor_hot_y) {
+ int x, y;
+
+ x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+ y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+ dce_v8_0_cursor_move_locked(crtc, x, y);
+
+ amdgpu_crtc->cursor_width = width;
+ amdgpu_crtc->cursor_height = height;
+ amdgpu_crtc->cursor_hot_x = hot_x;
+ amdgpu_crtc->cursor_hot_y = hot_y;
+ }
+
+ dce_v8_0_show_cursor(crtc);
+ dce_v8_0_lock_cursor(crtc, false);
+
+unpin:
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ ret = amdgpu_bo_reserve(aobj, true);
+ if (likely(ret == 0)) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
+ drm_gem_object_put(amdgpu_crtc->cursor_bo);
+ }
+
+ amdgpu_crtc->cursor_bo = obj;
+ return 0;
+}
+
+static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo) {
+ dce_v8_0_lock_cursor(crtc, true);
+
+ dce_v8_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+ amdgpu_crtc->cursor_y);
+
+ dce_v8_0_show_cursor(crtc);
+
+ dce_v8_0_lock_cursor(crtc, false);
+ }
+}
+
+static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ dce_v8_0_crtc_load_lut(crtc);
+
+ return 0;
+}
+
+static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ drm_crtc_cleanup(crtc);
+ kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
+ .cursor_set2 = dce_v8_0_crtc_cursor_set2,
+ .cursor_move = dce_v8_0_crtc_cursor_move,
+ .gamma_set = dce_v8_0_crtc_gamma_set,
+ .set_config = amdgpu_display_crtc_set_config,
+ .destroy = dce_v8_0_crtc_destroy,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
+ .get_vblank_counter = amdgpu_get_vblank_counter_kms,
+ .enable_vblank = amdgpu_enable_vblank_kms,
+ .disable_vblank = amdgpu_disable_vblank_kms,
+ .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ unsigned type;
+
+ switch (mode) {
+ case DRM_MODE_DPMS_ON:
+ amdgpu_crtc->enabled = true;
+ amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+ dce_v8_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+ dce_v8_0_vga_enable(crtc, false);
+ /* Make sure VBLANK and PFLIP interrupts are still enabled */
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
+ amdgpu_irq_update(adev, &adev->crtc_irq, type);
+ amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+ drm_crtc_vblank_on(crtc);
+ dce_v8_0_crtc_load_lut(crtc);
+ break;
+ case DRM_MODE_DPMS_STANDBY:
+ case DRM_MODE_DPMS_SUSPEND:
+ case DRM_MODE_DPMS_OFF:
+ drm_crtc_vblank_off(crtc);
+ if (amdgpu_crtc->enabled) {
+ dce_v8_0_vga_enable(crtc, true);
+ amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+ dce_v8_0_vga_enable(crtc, false);
+ }
+ amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+ amdgpu_crtc->enabled = false;
+ break;
+ }
+ /* adjust pm to dpms */
+ amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v8_0_crtc_prepare(struct drm_crtc *crtc)
+{
+ /* disable crtc pair power gating before programming */
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+ dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v8_0_crtc_commit(struct drm_crtc *crtc)
+{
+ dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_atom_ss ss;
+ int i;
+
+ dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+ if (crtc->primary->fb) {
+ int r;
+ struct amdgpu_bo *abo;
+
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+ r = amdgpu_bo_reserve(abo, true);
+ if (unlikely(r))
+ DRM_ERROR("failed to reserve abo before unpin\n");
+ else {
+ amdgpu_bo_unpin(abo);
+ amdgpu_bo_unreserve(abo);
+ }
+ }
+ /* disable the GRPH */
+ dce_v8_0_grph_enable(crtc, false);
+
+ amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (adev->mode_info.crtcs[i] &&
+ adev->mode_info.crtcs[i]->enabled &&
+ i != amdgpu_crtc->crtc_id &&
+ amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+ /* one other crtc is using this pll don't turn
+ * off the pll
+ */
+ goto done;
+ }
+ }
+
+ switch (amdgpu_crtc->pll_id) {
+ case ATOM_PPLL1:
+ case ATOM_PPLL2:
+ /* disable the ppll */
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ case ATOM_PPLL0:
+ /* disable the ppll */
+ if ((adev->asic_type == CHIP_KAVERI) ||
+ (adev->asic_type == CHIP_BONAIRE) ||
+ (adev->asic_type == CHIP_HAWAII))
+ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+ 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+ break;
+ default:
+ break;
+ }
+done:
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v8_0_crtc_mode_set(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode,
+ int x, int y, struct drm_framebuffer *old_fb)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (!amdgpu_crtc->adjusted_clock)
+ return -EINVAL;
+
+ amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+ amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+ dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+ amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+ amdgpu_atombios_crtc_scaler_setup(crtc);
+ dce_v8_0_cursor_reset(crtc);
+ /* update the hw version fpr dpm */
+ amdgpu_crtc->hw_mode = *adjusted_mode;
+
+ return 0;
+}
+
+static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_device *dev = crtc->dev;
+ struct drm_encoder *encoder;
+
+ /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->crtc == crtc) {
+ amdgpu_crtc->encoder = encoder;
+ amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+ break;
+ }
+ }
+ if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ return false;
+ }
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ return false;
+ if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+ return false;
+ /* pick pll */
+ amdgpu_crtc->pll_id = dce_v8_0_pick_pll(crtc);
+ /* if we can't get a PPLL for a non-DP encoder, fail */
+ if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+ !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+ return false;
+
+ return true;
+}
+
+static int dce_v8_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)
+{
+ return dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v8_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ int x, int y, enum mode_set_atomic state)
+{
+ return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
+ .dpms = dce_v8_0_crtc_dpms,
+ .mode_fixup = dce_v8_0_crtc_mode_fixup,
+ .mode_set = dce_v8_0_crtc_mode_set,
+ .mode_set_base = dce_v8_0_crtc_set_base,
+ .mode_set_base_atomic = dce_v8_0_crtc_set_base_atomic,
+ .prepare = dce_v8_0_crtc_prepare,
+ .commit = dce_v8_0_crtc_commit,
+ .disable = dce_v8_0_crtc_disable,
+ .get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+ (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+ if (amdgpu_crtc == NULL)
+ return -ENOMEM;
+
+ drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v8_0_crtc_funcs);
+
+ drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+ amdgpu_crtc->crtc_id = index;
+ adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+ amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
+ amdgpu_crtc->max_cursor_height = CIK_CURSOR_HEIGHT;
+ adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+ adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+ amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id];
+
+ amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+ amdgpu_crtc->adjusted_clock = 0;
+ amdgpu_crtc->encoder = NULL;
+ amdgpu_crtc->connector = NULL;
+ drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+
+ return 0;
+}
+
+static int dce_v8_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
+ adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
+
+ dce_v8_0_set_display_funcs(adev);
+
+ adev->mode_info.num_crtc = dce_v8_0_get_num_crtc(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 6;
+ break;
+ case CHIP_KAVERI:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 7;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ adev->mode_info.num_hpd = 6;
+ adev->mode_info.num_dig = 6; /* ? */
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ dce_v8_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int dce_v8_0_sw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 8; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ if (r)
+ return r;
+ }
+
+ /* HPD hotplug */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+
+ adev_to_drm(adev)->mode_config.async_page_flip = true;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ adev_to_drm(adev)->mode_config.preferred_depth = 24;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+ adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+ r = amdgpu_display_modeset_create_props(adev);
+ if (r)
+ return r;
+
+ adev_to_drm(adev)->mode_config.max_width = 16384;
+ adev_to_drm(adev)->mode_config.max_height = 16384;
+
+ /* allocate crtcs */
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ r = dce_v8_0_crtc_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+ amdgpu_display_print_display_setup(adev_to_drm(adev));
+ else
+ return -EINVAL;
+
+ /* setup afmt */
+ r = dce_v8_0_afmt_init(adev);
+ if (r)
+ return r;
+
+ r = dce_v8_0_audio_init(adev);
+ if (r)
+ return r;
+
+ /* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
+ adev_to_drm(adev)->vblank_disable_immediate = true;
+
+ r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+ if (r)
+ return r;
+
+ /* Pre-DCE11 */
+ INIT_DELAYED_WORK(&adev->hotplug_work,
+ amdgpu_display_hotplug_work_func);
+
+ drm_kms_helper_poll_init(adev_to_drm(adev));
+
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
+}
+
+static int dce_v8_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ kfree(adev->mode_info.bios_hardcoded_edid);
+
+ drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+ dce_v8_0_audio_fini(adev);
+
+ dce_v8_0_afmt_fini(adev);
+
+ drm_mode_config_cleanup(adev_to_drm(adev));
+ adev->mode_info.mode_config_initialized = false;
+
+ return 0;
+}
+
+static int dce_v8_0_hw_init(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* disable vga render */
+ dce_v8_0_set_vga_render_state(adev, false);
+ /* init dig PHYs, disp eng pll */
+ amdgpu_atombios_encoder_init_dig(adev);
+ amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+ /* initialize hpd */
+ dce_v8_0_hpd_init(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v8_0_pageflip_interrupt_init(adev);
+
+ return 0;
+}
+
+static int dce_v8_0_hw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dce_v8_0_hpd_fini(adev);
+
+ for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+ dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+ }
+
+ dce_v8_0_pageflip_interrupt_fini(adev);
+
+ flush_delayed_work(&adev->hotplug_work);
+
+ return 0;
+}
+
+static int dce_v8_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_display_suspend_helper(adev);
+ if (r)
+ return r;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+ return dce_v8_0_hw_fini(handle);
+}
+
+static int dce_v8_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
+ ret = dce_v8_0_hw_init(handle);
+
+ /* turn on the BL */
+ if (adev->mode_info.bl_encoder) {
+ u8 bl_level = amdgpu_display_backlight_get_level(adev,
+ adev->mode_info.bl_encoder);
+ amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+ bl_level);
+ }
+ if (ret)
+ return ret;
+
+ return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v8_0_is_idle(void *handle)
+{
+ return true;
+}
+
+static int dce_v8_0_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int dce_v8_0_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (dce_v8_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void dce_v8_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg_block, lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (crtc) {
+ case 0:
+ reg_block = CRTC0_REGISTER_OFFSET;
+ break;
+ case 1:
+ reg_block = CRTC1_REGISTER_OFFSET;
+ break;
+ case 2:
+ reg_block = CRTC2_REGISTER_OFFSET;
+ break;
+ case 3:
+ reg_block = CRTC3_REGISTER_OFFSET;
+ break;
+ case 4:
+ reg_block = CRTC4_REGISTER_OFFSET;
+ break;
+ case 5:
+ reg_block = CRTC5_REGISTER_OFFSET;
+ break;
+ default:
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+ lb_interrupt_mask &= ~LB_INTERRUPT_MASK__VBLANK_INTERRUPT_MASK_MASK;
+ WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+ lb_interrupt_mask |= LB_INTERRUPT_MASK__VBLANK_INTERRUPT_MASK_MASK;
+ WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+ int crtc,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg_block, lb_interrupt_mask;
+
+ if (crtc >= adev->mode_info.num_crtc) {
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (crtc) {
+ case 0:
+ reg_block = CRTC0_REGISTER_OFFSET;
+ break;
+ case 1:
+ reg_block = CRTC1_REGISTER_OFFSET;
+ break;
+ case 2:
+ reg_block = CRTC2_REGISTER_OFFSET;
+ break;
+ case 3:
+ reg_block = CRTC3_REGISTER_OFFSET;
+ break;
+ case 4:
+ reg_block = CRTC4_REGISTER_OFFSET;
+ break;
+ case 5:
+ reg_block = CRTC5_REGISTER_OFFSET;
+ break;
+ default:
+ DRM_DEBUG("invalid crtc %d\n", crtc);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+ lb_interrupt_mask &= ~LB_INTERRUPT_MASK__VLINE_INTERRUPT_MASK_MASK;
+ WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+ lb_interrupt_mask |= LB_INTERRUPT_MASK__VLINE_INTERRUPT_MASK_MASK;
+ WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+ break;
+ default:
+ break;
+ }
+}
+
+static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 dc_hpd_int_cntl;
+
+ if (type >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hdp %d\n", type);
+ return 0;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+ dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+ dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CRTC_IRQ_VBLANK1:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK2:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK3:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK4:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK5:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VBLANK6:
+ dce_v8_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE1:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE2:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 1, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE3:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 2, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE4:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 3, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE5:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 4, state);
+ break;
+ case AMDGPU_CRTC_IRQ_VLINE6:
+ dce_v8_0_set_crtc_vline_interrupt_state(adev, 5, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned crtc = entry->src_id - 1;
+ uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
+
+ switch (entry->src_data[0]) {
+ case 0: /* vblank */
+ if (disp_int & interrupt_status_offsets[crtc].vblank)
+ WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ if (amdgpu_irq_enabled(adev, source, irq_type)) {
+ drm_handle_vblank(adev_to_drm(adev), crtc);
+ }
+ DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+ break;
+ case 1: /* vline */
+ if (disp_int & interrupt_status_offsets[crtc].vline)
+ WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK);
+ else
+ DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+ DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+ break;
+ default:
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 reg;
+
+ if (type >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", type);
+ return -EINVAL;
+ }
+
+ reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+ if (state == AMDGPU_IRQ_STATE_DISABLE)
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+ else
+ WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+ reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+ return 0;
+}
+
+static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned long flags;
+ unsigned crtc_id;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_flip_work *works;
+
+ crtc_id = (entry->src_id - 8) >> 1;
+ amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+ if (crtc_id >= adev->mode_info.num_crtc) {
+ DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+ return -EINVAL;
+ }
+
+ if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+ WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+ GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+ /* IRQ could occur when in initial stage */
+ if (amdgpu_crtc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ works = amdgpu_crtc->pflip_works;
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+ "AMDGPU_FLIP_SUBMITTED(%d)\n",
+ amdgpu_crtc->pflip_status,
+ AMDGPU_FLIP_SUBMITTED);
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+ return 0;
+ }
+
+ /* page flip completed. clean up */
+ amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+ amdgpu_crtc->pflip_works = NULL;
+
+ /* wakeup usersapce */
+ if (works->event)
+ drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+ spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+ drm_crtc_vblank_put(&amdgpu_crtc->base);
+ schedule_work(&works->unpin_work);
+
+ return 0;
+}
+
+static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t disp_int, mask, tmp;
+ unsigned hpd;
+
+ if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+ return 0;
+ }
+
+ hpd = entry->src_data[0];
+ disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+ mask = interrupt_status_offsets[hpd].hpd;
+
+ if (disp_int & mask) {
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+ schedule_delayed_work(&adev->hotplug_work, 0);
+ DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+ }
+
+ return 0;
+
+}
+
+static int dce_v8_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int dce_v8_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
+ .name = "dce_v8_0",
+ .early_init = dce_v8_0_early_init,
+ .late_init = NULL,
+ .sw_init = dce_v8_0_sw_init,
+ .sw_fini = dce_v8_0_sw_fini,
+ .hw_init = dce_v8_0_hw_init,
+ .hw_fini = dce_v8_0_hw_fini,
+ .suspend = dce_v8_0_suspend,
+ .resume = dce_v8_0_resume,
+ .is_idle = dce_v8_0_is_idle,
+ .wait_for_idle = dce_v8_0_wait_for_idle,
+ .soft_reset = dce_v8_0_soft_reset,
+ .set_clockgating_state = dce_v8_0_set_clockgating_state,
+ .set_powergating_state = dce_v8_0_set_powergating_state,
+};
+
+static void
+dce_v8_0_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+ amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+ /* need to call this here rather than in prepare() since we need some crtc info */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ /* set scaler clears this on some chips */
+ dce_v8_0_set_interleave(encoder->crtc, mode);
+
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+ dce_v8_0_afmt_enable(encoder, true);
+ dce_v8_0_afmt_setmode(encoder, adjusted_mode);
+ }
+}
+
+static void dce_v8_0_encoder_prepare(struct drm_encoder *encoder)
+{
+ struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+ if ((amdgpu_encoder->active_device &
+ (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+ (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+ ENCODER_OBJECT_ID_NONE)) {
+ struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ if (dig) {
+ dig->dig_encoder = dce_v8_0_pick_dig_encoder(encoder);
+ if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+ dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+ }
+ }
+
+ amdgpu_atombios_scratch_regs_lock(adev, true);
+
+ if (connector) {
+ struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+ /* select the clock/data port if it uses a router */
+ if (amdgpu_connector->router.cd_valid)
+ amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+ /* turn eDP panel on for mode set */
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+ amdgpu_atombios_encoder_set_edp_panel_power(connector,
+ ATOM_TRANSMITTER_ACTION_POWER_ON);
+ }
+
+ /* this is needed for the pll/ss setup to work correctly in some cases */
+ amdgpu_atombios_encoder_set_crtc_source(encoder);
+ /* set up the FMT blocks */
+ dce_v8_0_program_fmt(encoder);
+}
+
+static void dce_v8_0_encoder_commit(struct drm_encoder *encoder)
+{
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ /* need to call this here as we need the crtc set up */
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+ amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v8_0_encoder_disable(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct amdgpu_encoder_atom_dig *dig;
+
+ amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+ if (amdgpu_atombios_encoder_is_digital(encoder)) {
+ if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+ dce_v8_0_afmt_enable(encoder, false);
+ dig = amdgpu_encoder->enc_priv;
+ dig->dig_encoder = -1;
+ }
+ amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v8_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v8_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v8_0_ext_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v8_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v8_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static const struct drm_encoder_helper_funcs dce_v8_0_ext_helper_funcs = {
+ .dpms = dce_v8_0_ext_dpms,
+ .prepare = dce_v8_0_ext_prepare,
+ .mode_set = dce_v8_0_ext_mode_set,
+ .commit = dce_v8_0_ext_commit,
+ .disable = dce_v8_0_ext_disable,
+ /* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v8_0_dig_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v8_0_encoder_prepare,
+ .mode_set = dce_v8_0_encoder_mode_set,
+ .commit = dce_v8_0_encoder_commit,
+ .disable = dce_v8_0_encoder_disable,
+ .detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v8_0_dac_helper_funcs = {
+ .dpms = amdgpu_atombios_encoder_dpms,
+ .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+ .prepare = dce_v8_0_encoder_prepare,
+ .mode_set = dce_v8_0_encoder_mode_set,
+ .commit = dce_v8_0_encoder_commit,
+ .detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v8_0_encoder_destroy(struct drm_encoder *encoder)
+{
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+ kfree(amdgpu_encoder->enc_priv);
+ drm_encoder_cleanup(encoder);
+ kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v8_0_encoder_funcs = {
+ .destroy = dce_v8_0_encoder_destroy,
+};
+
+static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
+ uint32_t encoder_enum,
+ uint32_t supported_device,
+ u16 caps)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_encoder *encoder;
+ struct amdgpu_encoder *amdgpu_encoder;
+
+ /* see if we already added it */
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ amdgpu_encoder = to_amdgpu_encoder(encoder);
+ if (amdgpu_encoder->encoder_enum == encoder_enum) {
+ amdgpu_encoder->devices |= supported_device;
+ return;
+ }
+
+ }
+
+ /* add a new one */
+ amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+ if (!amdgpu_encoder)
+ return;
+
+ encoder = &amdgpu_encoder->base;
+ switch (adev->mode_info.num_crtc) {
+ case 1:
+ encoder->possible_crtcs = 0x1;
+ break;
+ case 2:
+ default:
+ encoder->possible_crtcs = 0x3;
+ break;
+ case 4:
+ encoder->possible_crtcs = 0xf;
+ break;
+ case 6:
+ encoder->possible_crtcs = 0x3f;
+ break;
+ }
+
+ amdgpu_encoder->enc_priv = NULL;
+
+ amdgpu_encoder->encoder_enum = encoder_enum;
+ amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+ amdgpu_encoder->devices = supported_device;
+ amdgpu_encoder->rmx_type = RMX_OFF;
+ amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+ amdgpu_encoder->is_ext_encoder = false;
+ amdgpu_encoder->caps = caps;
+
+ switch (amdgpu_encoder->encoder_id) {
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ drm_encoder_helper_add(encoder, &dce_v8_0_dac_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+ amdgpu_encoder->rmx_type = RMX_FULL;
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+ } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ } else {
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+ }
+ drm_encoder_helper_add(encoder, &dce_v8_0_dig_helper_funcs);
+ break;
+ case ENCODER_OBJECT_ID_SI170B:
+ case ENCODER_OBJECT_ID_CH7303:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+ case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+ case ENCODER_OBJECT_ID_TITFP513:
+ case ENCODER_OBJECT_ID_VT1623:
+ case ENCODER_OBJECT_ID_HDMI_SI1930:
+ case ENCODER_OBJECT_ID_TRAVIS:
+ case ENCODER_OBJECT_ID_NUTMEG:
+ /* these are handled by the primary encoders */
+ amdgpu_encoder->is_ext_encoder = true;
+ if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_LVDS, NULL);
+ else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ else
+ drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+ DRM_MODE_ENCODER_TMDS, NULL);
+ drm_encoder_helper_add(encoder, &dce_v8_0_ext_helper_funcs);
+ break;
+ }
+}
+
+static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
+ .bandwidth_update = &dce_v8_0_bandwidth_update,
+ .vblank_get_counter = &dce_v8_0_vblank_get_counter,
+ .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+ .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+ .hpd_sense = &dce_v8_0_hpd_sense,
+ .hpd_set_polarity = &dce_v8_0_hpd_set_polarity,
+ .hpd_get_gpio_reg = &dce_v8_0_hpd_get_gpio_reg,
+ .page_flip = &dce_v8_0_page_flip,
+ .page_flip_get_scanoutpos = &dce_v8_0_crtc_get_scanoutpos,
+ .add_encoder = &dce_v8_0_encoder_add,
+ .add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
+{
+ adev->mode_info.funcs = &dce_v8_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
+ .set = dce_v8_0_set_crtc_interrupt_state,
+ .process = dce_v8_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
+ .set = dce_v8_0_set_pageflip_interrupt_state,
+ .process = dce_v8_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
+ .set = dce_v8_0_set_hpd_interrupt_state,
+ .process = dce_v8_0_hpd_irq,
+};
+
+static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mode_info.num_crtc > 0)
+ adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+ else
+ adev->crtc_irq.num_types = 0;
+ adev->crtc_irq.funcs = &dce_v8_0_crtc_irq_funcs;
+
+ adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+ adev->pageflip_irq.funcs = &dce_v8_0_pageflip_irq_funcs;
+
+ adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+ adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v8_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 8,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 8,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 8,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 8,
+ .minor = 3,
+ .rev = 0,
+ .funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_DCE,
+ .major = 8,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &dce_v8_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
new file mode 100644
index 000000000..13b802dd9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V8_0_H__
+#define __DCE_V8_0_H__
+
+extern const struct amdgpu_ip_block_version dce_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_1_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_2_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_3_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_5_ip_block;
+
+void dce_v8_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 000000000..5dfab80ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_sw_init(struct amdgpu_device *adev)
+{
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+}
+
+static void df_v1_7_sw_fini(struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+ tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+
+ return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* Put DF on broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, true);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+ ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+ .sw_init = df_v1_7_sw_init,
+ .sw_fini = df_v1_7_sw_fini,
+ .enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+ .get_fb_channel_number = df_v1_7_get_fb_channel_number,
+ .get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+ .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v1_7_get_clockgating_state,
+ .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
new file mode 100644
index 000000000..74621104c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
+
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
+{
+ DF_V1_7_MGCG_DISABLE = 0,
+ DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+ DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+ DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+ DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+ DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
+};
+
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 000000000..483a441b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+#define DF_3_6_SMN_REG_INST_DIST 0x8
+#define DF_3_6_INST_CNT 8
+
+/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
+#define DF_V3_6_MAX_COUNTERS 4
+
+/* get flags from df perfmon config */
+#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL)
+#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
+#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
+#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+ 16, 32, 0, 0, 0, 2, 4, 8};
+
+static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
+ uint32_t ficaa_val)
+{
+ unsigned long flags, address, data;
+ uint32_t ficadl_val, ficadh_val;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+ WREG32(data, ficaa_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+ ficadl_val = RREG32(data);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+ ficadh_val = RREG32(data);
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
+}
+
+static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
+ uint32_t ficadl_val, uint32_t ficadh_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+ WREG32(data, ficaa_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+ WREG32(data, ficadl_val);
+
+ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+ WREG32(data, ficadh_val);
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_rreg - read perfmon lo and hi
+ *
+ * required to be atomic. no mmio method provided so subsequent reads for lo
+ * and hi require to preserve df finite state machine
+ */
+static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t *lo_val,
+ uint32_t hi_addr, uint32_t *hi_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ *lo_val = RREG32(data);
+ WREG32(address, hi_addr);
+ *hi_val = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_wreg - write to perfmon lo and hi
+ *
+ * required to be atomic. no mmio method provided so subsequent reads after
+ * data writes cannot occur to preserve data fabrics finite state machine.
+ */
+static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
+ uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ WREG32(data, lo_val);
+ WREG32(address, hi_addr);
+ WREG32(data, hi_val);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/* same as perfmon_wreg but return status on write value check */
+static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ unsigned long flags, address, data;
+ uint32_t lo_val_rb, hi_val_rb;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ WREG32(data, lo_val);
+ WREG32(address, hi_addr);
+ WREG32(data, hi_val);
+
+ WREG32(address, lo_addr);
+ lo_val_rb = RREG32(data);
+ WREG32(address, hi_addr);
+ hi_val_rb = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
+ return -EBUSY;
+
+ return 0;
+}
+
+
+/*
+ * retry arming counters every 100 usecs within 1 millisecond interval.
+ * if retry fails after time out, return error.
+ */
+#define ARM_RETRY_USEC_TIMEOUT 1000
+#define ARM_RETRY_USEC_INTERVAL 100
+static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ int countdown = ARM_RETRY_USEC_TIMEOUT;
+
+ while (countdown) {
+
+ if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
+ hi_addr, hi_val))
+ break;
+
+ countdown -= ARM_RETRY_USEC_INTERVAL;
+ udelay(ARM_RETRY_USEC_INTERVAL);
+ }
+
+ return countdown > 0 ? 0 : -ETIME;
+}
+
+/* get the number of df counters available */
+static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev;
+ struct drm_device *ddev;
+ int i, count;
+
+ ddev = dev_get_drvdata(dev);
+ adev = drm_to_adev(ddev);
+ count = 0;
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if (adev->df_perfmon_config_assign_mask[i] == 0)
+ count++;
+ }
+
+ return sysfs_emit(buf, "%i\n", count);
+}
+
+/* device attr for available perfmon counters */
+static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
+
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+
+ /* encoding for hash-enabled on Arcturus and Aldebaran */
+ if ((adev->asic_type == CHIP_ARCTURUS &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+ (adev->asic_type == CHIP_ALDEBARAN &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+ adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl64K);
+ adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl2M);
+ adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl1G);
+ }
+}
+
+/* init perfmons */
+static void df_v3_6_sw_init(struct amdgpu_device *adev)
+{
+ int i, ret;
+
+ ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (ret)
+ DRM_ERROR("failed to create file for available df counters\n");
+
+ for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
+ adev->df_perfmon_config_assign_mask[i] = 0;
+
+ df_v3_6_query_hashes(adev);
+}
+
+static void df_v3_6_sw_fini(struct amdgpu_device *adev)
+{
+
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0);
+ tmp &=
+ ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+ tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ }
+ tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+ if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
+ fb_channel_number = 0;
+
+ return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+ /* Put DF on broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, true);
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, false);
+ }
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+/* get assigned df perfmon ctr as int */
+static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx)
+{
+
+ return ((config & 0x0FFFFFFUL) ==
+ adev->df_perfmon_config_assign_mask[counter_idx]);
+
+}
+
+/* get address based on counter assignment */
+static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx,
+ int is_ctrl,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+ if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
+ return;
+
+ switch (counter_idx) {
+
+ case 0:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
+ break;
+ case 1:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
+ break;
+ case 2:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
+ break;
+ case 3:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
+ break;
+
+ }
+
+}
+
+/* get read counter address */
+static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+ df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr,
+ hi_base_addr);
+}
+
+/* get control counter settings i.e. address and values to set */
+static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr,
+ uint32_t *lo_val,
+ uint32_t *hi_val,
+ bool is_enable)
+{
+
+ uint32_t eventsel, instance, unitmask;
+ uint32_t instance_10, instance_5432, instance_76;
+
+ df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr,
+ hi_base_addr);
+
+ if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+ DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
+ *lo_base_addr, *hi_base_addr);
+ return -ENXIO;
+ }
+
+ eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
+ unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
+ instance = DF_V3_6_GET_INSTANCE(config);
+
+ instance_10 = instance & 0x3;
+ instance_5432 = (instance >> 2) & 0xf;
+ instance_76 = (instance >> 6) & 0x3;
+
+ *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
+ *lo_val = is_enable ? *lo_val | (1 << 22) : *lo_val & ~(1 << 22);
+ *hi_val = (instance_76 << 29) | instance_5432;
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+ config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
+
+ return 0;
+}
+
+/* add df performance counters for read */
+static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ int i;
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if (adev->df_perfmon_config_assign_mask[i] == 0U) {
+ adev->df_perfmon_config_assign_mask[i] =
+ config & 0x0FFFFFFUL;
+ return i;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+#define DEFERRED_ARM_MASK (1 << 31)
+static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
+ uint64_t config, int counter_idx,
+ bool is_deferred)
+{
+
+ if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
+ return -EINVAL;
+
+ if (is_deferred)
+ adev->df_perfmon_config_assign_mask[counter_idx] |=
+ DEFERRED_ARM_MASK;
+ else
+ adev->df_perfmon_config_assign_mask[counter_idx] &=
+ ~DEFERRED_ARM_MASK;
+
+ return 0;
+}
+
+static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx)
+{
+ return (df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
+ (adev->df_perfmon_config_assign_mask[counter_idx]
+ & DEFERRED_ARM_MASK));
+
+}
+
+/* release performance counter */
+static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx)
+{
+ if (df_v3_6_pmc_has_counter(adev, config, counter_idx))
+ adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;
+}
+
+
+static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx)
+{
+ uint32_t lo_base_addr = 0, hi_base_addr = 0;
+
+ df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr,
+ &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+}
+
+/* return available counter if is_add == 1 otherwise return error status. */
+static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ int counter_idx, int is_add)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int err = 0, ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ if (is_add)
+ return df_v3_6_pmc_add_cntr(adev, config);
+
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ counter_idx,
+ &lo_base_addr,
+ &hi_base_addr,
+ &lo_val,
+ &hi_val,
+ true);
+
+ if (ret)
+ return ret;
+
+ err = df_v3_6_perfmon_arm_with_retry(adev,
+ lo_base_addr,
+ lo_val,
+ hi_base_addr,
+ hi_val);
+
+ if (err)
+ ret = df_v3_6_pmc_set_deferred(adev, config,
+ counter_idx, true);
+
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+ int counter_idx, int is_remove)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ counter_idx,
+ &lo_base_addr,
+ &hi_base_addr,
+ &lo_val,
+ &hi_val,
+ false);
+
+ if (ret)
+ return ret;
+
+ df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
+ hi_base_addr, hi_val);
+
+ if (is_remove) {
+ df_v3_6_reset_perfmon_cntr(adev, config, counter_idx);
+ df_v3_6_pmc_release_cntr(adev, config, counter_idx);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+ uint64_t config,
+ int counter_idx,
+ uint64_t *count)
+{
+ uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
+ *count = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
+ &lo_base_addr, &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ /* rearm the counter or throw away count value on failure */
+ if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {
+ int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
+ lo_base_addr, lo_val,
+ hi_base_addr, hi_val);
+
+ if (rearm_err)
+ return;
+
+ df_v3_6_pmc_set_deferred(adev, config, counter_idx,
+ false);
+ }
+
+ df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
+ hi_base_addr, &hi_val);
+
+ *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+
+ if (*count >= DF_V3_6_PERFMON_OVERFLOW)
+ *count = 0;
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+ config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+
+ break;
+ default:
+ break;
+ }
+}
+
+static bool df_v3_6_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t hw_assert_msklo, hw_assert_mskhi;
+ uint32_t v0, v1, v28, v31;
+
+ hw_assert_msklo = RREG32_SOC15(DF, 0,
+ mmDF_CS_UMC_AON0_HardwareAssertMaskLow);
+ hw_assert_mskhi = RREG32_SOC15(DF, 0,
+ mmDF_NCS_PG0_HardwareAssertMaskHigh);
+
+ v0 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+ v1 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+ v28 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+ v31 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+ if (v0 && v1 && v28 && v31)
+ return true;
+ else if (!v0 && !v1 && !v28 && !v31)
+ return false;
+ else {
+ dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+ v0, v1, v28, v31);
+ return false;
+ }
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+ .sw_init = df_v3_6_sw_init,
+ .sw_fini = df_v3_6_sw_fini,
+ .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+ .get_fb_channel_number = df_v3_6_get_fb_channel_number,
+ .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+ .update_medium_grain_clock_gating =
+ df_v3_6_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v3_6_get_clockgating_state,
+ .pmc_start = df_v3_6_pmc_start,
+ .pmc_stop = df_v3_6_pmc_stop,
+ .pmc_get_count = df_v3_6_pmc_get_count,
+ .get_fica = df_v3_6_get_fica,
+ .set_fica = df_v3_6_set_fica,
+ .query_ras_poison_mode = df_v3_6_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 000000000..2505c7ef2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
+
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+ DF_V3_6_MGCG_DISABLE = 0,
+ DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+ DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+ DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+ DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+ DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
+};
+
+extern const struct attribute_group *df_v3_6_attr_groups[];
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.c b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
new file mode 100644
index 000000000..e8b9e19ed
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_3.h"
+
+#include "df/df_4_3_offset.h"
+#include "df/df_4_3_sh_mask.h"
+
+static bool df_v4_3_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t hw_assert_msklo, hw_assert_mskhi;
+ uint32_t v0, v1, v28, v31;
+
+ hw_assert_msklo = RREG32_SOC15(DF, 0,
+ regDF_CS_UMC_AON0_HardwareAssertMaskLow);
+ hw_assert_mskhi = RREG32_SOC15(DF, 0,
+ regDF_NCS_PG0_HardwareAssertMaskHigh);
+
+ v0 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+ v1 = REG_GET_FIELD(hw_assert_msklo,
+ DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+ v28 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+ v31 = REG_GET_FIELD(hw_assert_mskhi,
+ DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+ if (v0 && v1 && v28 && v31)
+ return true;
+ else if (!v0 && !v1 && !v28 && !v31)
+ return false;
+ else {
+ dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+ v0, v1, v28, v31);
+ return false;
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_3_funcs = {
+ .query_ras_poison_mode = df_v4_3_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.h b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
new file mode 100644
index 000000000..06ef0724e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_3_H__
+#define __DF_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
new file mode 100644
index 000000000..e9f177e9e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "dimgrey_cavefish_ip_offset.h"
+
+int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialize the block needed by driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/emu_soc.c b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
new file mode 100644
index 000000000..d72c25c1b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+
+int emu_soc_asic_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
new file mode 100644
index 000000000..c2b9dfc64
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -0,0 +1,9547 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "nv.h"
+#include "nvd.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
+
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "clearstate_gfx10.h"
+#include "v10_structs.h"
+#include "gfx_v10_0.h"
+#include "nbio_v2_3.h"
+
+/*
+ * Navi10 has two graphic rings to share each graphic pipe.
+ * 1. Primary ring
+ * 2. Async ring
+ */
+#define GFX10_NUM_GFX_RINGS_NV1X 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
+#define GFX10_MEC_HPD_SIZE 2048
+
+#define F32_CE_PROGRAM_RAM_SIZE 65536
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define mmCGTT_GS_NGG_CLK_CTRL 0x5087
+#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
+#define mmCGTT_SPI_RA0_CLK_CTRL 0x507a
+#define mmCGTT_SPI_RA0_CLK_CTRL_BASE_IDX 1
+#define mmCGTT_SPI_RA1_CLK_CTRL 0x507b
+#define mmCGTT_SPI_RA1_CLK_CTRL_BASE_IDX 1
+
+#define GB_ADDR_CONFIG__NUM_PKRS__SHIFT 0x8
+#define GB_ADDR_CONFIG__NUM_PKRS_MASK 0x00000700L
+
+#define mmCGTS_TCC_DISABLE_gc_10_3 0x5006
+#define mmCGTS_TCC_DISABLE_gc_10_3_BASE_IDX 1
+#define mmCGTS_USER_TCC_DISABLE_gc_10_3 0x5007
+#define mmCGTS_USER_TCC_DISABLE_gc_10_3_BASE_IDX 1
+
+#define mmCP_MEC_CNTL_Sienna_Cichlid 0x0f55
+#define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid 0x4ca0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid_BASE_IDX 1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid 0x11ec
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid 0x0fc1
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid 0x0fc2
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid 0x0fc3
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid 0x0fc4
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid 0x0fc5
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid 0x0fc6
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid_BASE_IDX 0
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid__SHIFT 0x1a
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid_MASK 0x04000000L
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid_MASK 0x00000FFCL
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid__SHIFT 0x2
+#define CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK 0x00000FFCL
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1
+
+#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441
+#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1
+#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261
+#define mmVGT_TF_MEMORY_BASE_HI_Vangogh_BASE_IDX 1
+#define mmVGT_HS_OFFCHIP_PARAM_Vangogh 0x224f
+#define mmVGT_HS_OFFCHIP_PARAM_Vangogh_BASE_IDX 1
+#define mmVGT_TF_RING_SIZE_Vangogh 0x224e
+#define mmVGT_TF_RING_SIZE_Vangogh_BASE_IDX 1
+#define mmVGT_GSVS_RING_SIZE_Vangogh 0x2241
+#define mmVGT_GSVS_RING_SIZE_Vangogh_BASE_IDX 1
+#define mmVGT_TF_MEMORY_BASE_Vangogh 0x2250
+#define mmVGT_TF_MEMORY_BASE_Vangogh_BASE_IDX 1
+#define mmVGT_ESGS_RING_SIZE_Vangogh 0x2240
+#define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX 1
+#define mmSPI_CONFIG_CNTL_Vangogh 0x2440
+#define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX 1
+#define mmGCR_GENERAL_CNTL_Vangogh 0x1580
+#define mmGCR_GENERAL_CNTL_Vangogh_BASE_IDX 0
+#define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh 0x0000FFFFL
+
+#define mmCP_HYP_PFP_UCODE_ADDR 0x5814
+#define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_PFP_UCODE_DATA 0x5815
+#define mmCP_HYP_PFP_UCODE_DATA_BASE_IDX 1
+#define mmCP_HYP_CE_UCODE_ADDR 0x5818
+#define mmCP_HYP_CE_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_CE_UCODE_DATA 0x5819
+#define mmCP_HYP_CE_UCODE_DATA_BASE_IDX 1
+#define mmCP_HYP_ME_UCODE_ADDR 0x5816
+#define mmCP_HYP_ME_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_ME_UCODE_DATA 0x5817
+#define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1
+
+#define mmCPG_PSP_DEBUG 0x5c10
+#define mmCPG_PSP_DEBUG_BASE_IDX 1
+#define mmCPC_PSP_DEBUG 0x5c11
+#define mmCPC_PSP_DEBUG_BASE_IDX 1
+#define CPC_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L
+#define CPG_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L
+
+//CC_GC_SA_UNIT_DISABLE
+#define mmCC_GC_SA_UNIT_DISABLE 0x0fe9
+#define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX 0
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT 0x8
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK 0x0000FF00L
+//GC_USER_SA_UNIT_DISABLE
+#define mmGC_USER_SA_UNIT_DISABLE 0x0fea
+#define mmGC_USER_SA_UNIT_DISABLE_BASE_IDX 0
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT 0x8
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK 0x0000FF00L
+//PA_SC_ENHANCE_3
+#define mmPA_SC_ENHANCE_3 0x1085
+#define mmPA_SC_ENHANCE_3_BASE_IDX 0
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO__SHIFT 0x3
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO_MASK 0x00000008L
+
+#define mmCGTT_SPI_CS_CLK_CTRL 0x507c
+#define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX 1
+
+#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid 0x16f3
+#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0
+#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db
+#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0
+
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0
+
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1
+
+MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi10_me.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi12_me.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vangogh_ce.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_me.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_me.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = {
+ /* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x33),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = {
+ /* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1ac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1bc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1cc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x26),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x25),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x3b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = {
+ /* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x1f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x22),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x10),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x15),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x35),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = {
+ /* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
+
+ /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on Navy Flounder. */
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
+
+ /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x00001d00, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0x40000000, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00040000, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0x01000000, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x0000001f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xb0000ff0, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff000000, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+/* TODO: pending on golden setting value of gb address config */
+#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+
+static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx10_kiq_set_resources,
+ .kiq_map_queues = gfx10_kiq_map_queues,
+ .kiq_unmap_queues = gfx10_kiq_unmap_queues,
+ .kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
+}
+
+static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_rlc_spm_10_0_nv10,
+ (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
+ break;
+ case IP_VERSION(10, 1, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_rlc_spm_10_1_nv14,
+ (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
+ break;
+ case IP_VERSION(10, 1, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_rlc_spm_10_1_2_nv12,
+ (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_0_nv10,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
+ break;
+ case IP_VERSION(10, 1, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_nv14,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
+ break;
+ case IP_VERSION(10, 1, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_2,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1_2_nv12,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
+ break;
+ case IP_VERSION(10, 3, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_sienna_cichlid,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
+ break;
+ case IP_VERSION(10, 3, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_2,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2));
+ break;
+ case IP_VERSION(10, 3, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_vangogh,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh));
+ break;
+ case IP_VERSION(10, 3, 3):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_3,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_3));
+ break;
+ case IP_VERSION(10, 3, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_4,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+ break;
+ case IP_VERSION(10, 3, 5):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_5,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5));
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_0_cyan_skillfish,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish));
+ break;
+ case IP_VERSION(10, 3, 6):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_6,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6));
+ break;
+ case IP_VERSION(10, 3, 7):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_7,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7));
+ break;
+ default:
+ break;
+ }
+ gfx_v10_0_init_spm_golden_registers(adev);
+}
+
+static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned int i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned int index;
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_ptr;
+ long r;
+
+ memset(&ib, 0, sizeof(ib));
+
+ if (ring->is_mes_queue) {
+ uint32_t padding, offset;
+
+ offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+ padding = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+
+ ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
+ *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ if (!ring->is_mes_queue)
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.cp_fw_write_wait = false;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ if ((adev->gfx.me_fw_version >= 0x00000046) &&
+ (adev->gfx.me_feature_version >= 27) &&
+ (adev->gfx.pfp_fw_version >= 0x00000068) &&
+ (adev->gfx.pfp_feature_version >= 27) &&
+ (adev->gfx.mec_fw_version >= 0x0000005b) &&
+ (adev->gfx.mec_feature_version >= 27))
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+
+ if (!adev->gfx.cp_fw_write_wait)
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+}
+
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+ bool ret = false;
+
+ switch (adev->pdev->revision) {
+ case 0xc2:
+ case 0xc3:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret;
+}
+
+static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
+{
+ char fw_name[40];
+ char ucode_prefix[30];
+ const char *wks = "";
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) &&
+ (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+ wks = "_wks";
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ if (!err) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
+ } else {
+ err = 0;
+ adev->gfx.mec2_fw = NULL;
+ }
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
+
+ gfx_v10_0_check_fw_write_wait(adev);
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ }
+
+ gfx_v10_0_check_gfxoff_flag(adev);
+
+ return err;
+}
+
+static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* set PA_SC_TILE_STEERING_OVERRIDE */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int ctx_reg_offset;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(ctx_reg_offset);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
+ break;
+ default:
+ reg_access_ctrl->spare_int =
+ SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ break;
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx10_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static void gfx_v10_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ const __le32 *fw_data = NULL;
+ unsigned int fw_size;
+ u32 *fw = NULL;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* in gfx10 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here
+ */
+ WARN_ON(simd != 0);
+
+ /* type 2 wave data */
+ dst[(*no_fields)++] = 2;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ nv_grbm_select(adev, me, pipe, q, vm);
+}
+
+static void gfx_v10_0_update_perfmon_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ data = def = RREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL);
+
+ if (enable)
+ data |= RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK;
+ else
+ data &= ~RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK;
+
+ if (data != def)
+ WREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL, data);
+}
+
+static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v10_0_select_se_sh,
+ .read_wave_data = &gfx_v10_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q,
+ .init_spm_golden = &gfx_v10_0_init_spm_golden_registers,
+ .update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg,
+};
+
+static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+}
+
+static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ unsigned int irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX10_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v10_0_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id = 0;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ /* KIQ event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
+ &adev->gfx.kiq[0].irq);
+ if (r)
+ return r;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v10_0_me_init(adev);
+
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
+ }
+
+ r = gfx_v10_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v10_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v10_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
+
+ gfx_v10_0_gpu_early_init(adev);
+
+ return 0;
+}
+
+static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+}
+
+static void gfx_v10_0_ce_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj,
+ &adev->gfx.ce.ce_fw_gpu_addr,
+ (void **)&adev->gfx.ce.ce_fw_ptr);
+}
+
+static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+}
+
+static int gfx_v10_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ gfx_v10_0_pfp_fini(adev);
+ gfx_v10_0_ce_fini(adev);
+ gfx_v10_0_me_fini(adev);
+ gfx_v10_0_rlc_fini(adev);
+ gfx_v10_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
+
+ gfx_v10_0_free_microcode(adev);
+
+ return 0;
+}
+
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+ data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
+
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 bitmap;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) &&
+ ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+ continue;
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = gfx_v10_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+}
+
+static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev)
+{
+ uint32_t num_sc;
+ uint32_t enabled_rb_per_sh;
+ uint32_t active_rb_bitmap;
+ uint32_t num_rb_per_sc;
+ uint32_t num_packer_per_sc;
+ uint32_t pa_sc_tile_steering_override;
+
+ /* for ASICs that integrates GFX v10.3
+ * pa_sc_tile_steering_override should be set to 0
+ */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ return 0;
+
+ /* init num_sc */
+ num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.num_sc_per_sh;
+ /* init num_rb_per_sc */
+ active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev);
+ enabled_rb_per_sh = hweight32(active_rb_bitmap);
+ num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh;
+ /* init num_packer_per_sc */
+ num_packer_per_sc = adev->gfx.config.num_packer_per_sc;
+
+ pa_sc_tile_steering_override = 0;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK;
+
+ return pa_sc_tile_steering_override;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
+static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ nv_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+ }
+
+ gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+ AMDGPU_NUM_VMID);
+}
+
+static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+
+static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ int i, j, k;
+ int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1;
+ u32 tmp, wgp_active_bitmap = 0;
+ u32 gcrd_targets_disable_tcp = 0;
+ u32 utcl_invreq_disable = 0;
+ /*
+ * GCRD_TARGETS_DISABLE field contains
+ * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
+ * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
+ */
+ u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
+ 2 * max_wgp_per_sh + /* TCP */
+ max_wgp_per_sh + /* SQC */
+ 4); /* GL1C */
+ /*
+ * UTCL1_UTCL0_INVREQ_DISABLE field contains
+ * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
+ * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
+ */
+ u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
+ 2 * max_wgp_per_sh + /* TCP */
+ 2 * max_wgp_per_sh + /* SQC */
+ 4 + /* RMI */
+ 1); /* SQG */
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ /*
+ * Set corresponding TCP bits for the inactive WGPs in
+ * GCRD_SA_TARGETS_DISABLE
+ */
+ gcrd_targets_disable_tcp = 0;
+ /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+ utcl_invreq_disable = 0;
+
+ for (k = 0; k < max_wgp_per_sh; k++) {
+ if (!(wgp_active_bitmap & (1 << k))) {
+ gcrd_targets_disable_tcp |= 3 << (2 * k);
+ gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
+ utcl_invreq_disable |= (3 << (2 * k)) |
+ (3 << (2 * (max_wgp_per_sh + k)));
+ }
+ }
+
+ tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (4 * max_wgp_per_sh));
+ tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+ WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (3 * max_wgp_per_sh));
+ tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+ WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+ }
+ }
+
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
+{
+ /* TCCs are global (not instanced). */
+ uint32_t tcc_disable;
+
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
+ tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
+ RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
+ } else {
+ tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) |
+ RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE);
+ }
+
+ adev->gfx.config.tcc_disabled_mask =
+ REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+ (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v10_0_setup_rb(adev);
+ gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v10_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override =
+ gfx_v10_0_init_pa_sc_tile_steering_override(adev);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ nv_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v10_0_init_compute_vmid(adev);
+ gfx_v10_0_init_gds_vmid(adev);
+
+}
+
+static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+
+ WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+}
+
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ /* csib */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+ }
+ return 0;
+}
+
+static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
+}
+
+static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= 0x800000;
+ } else
+ rlc_pg_cntl &= ~0x800000;
+ WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
+{
+ /*
+ * TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected
+ */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32_SOC15(GC, 0, mmRLC_SRM_CNTL);
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_SRM_CNTL, tmp);
+}
+
+static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned int i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->psp.autoload_supported) {
+
+ r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+ if (r)
+ return r;
+
+ gfx_v10_0_init_csb(adev);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v10_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v10_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v10_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ /* rlc backdoor autoload firmware */
+ r = gfx_v10_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v10_0_init_csb(adev);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
+ adev->gfx.rlc.funcs->start(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static struct {
+ FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+} rlc_autoload_info[FIRMWARE_ID_MAX];
+
+static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
+{
+ int ret;
+ RLC_TABLE_OF_CONTENT *rlc_toc;
+
+ ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_toc_bo,
+ &adev->gfx.rlc.rlc_toc_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_toc_buf);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret);
+ return ret;
+ }
+
+ /* Copy toc from psp sos fw to rlc toc buffer */
+ memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes);
+
+ rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf;
+ while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) &&
+ (rlc_toc->id < FIRMWARE_ID_MAX)) {
+ if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) &&
+ (rlc_toc->id <= FIRMWARE_ID_CP_MES)) {
+ /* Offset needs 4KB alignment */
+ rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE);
+ }
+
+ rlc_autoload_info[rlc_toc->id].id = rlc_toc->id;
+ rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4;
+ rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
+
+ rlc_toc++;
+ }
+
+ return 0;
+}
+
+static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ FIRMWARE_ID id;
+ int ret;
+
+ ret = gfx_v10_0_parse_rlc_toc(adev);
+ if (ret) {
+ dev_err(adev->dev, "failed to parse rlc toc\n");
+ return 0;
+ }
+
+ for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[FIRMWARE_ID_MAX-1].size;
+
+ return total_size;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v10_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo,
+ &adev->gfx.rlc.rlc_toc_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_toc_buf);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+
+ data = adev->gfx.rlc.rlc_toc_buf;
+ size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size;
+
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+
+ /* pfp ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_PFP,
+ fw_data, fw_size);
+
+ /* ce ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_CE,
+ fw_data, fw_size);
+
+ /* me ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_ME,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ /* mec1 ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ cp_hdr->jt_size * 4;
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_MEC,
+ fw_data, fw_size);
+ /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
+}
+
+/* Temporarily put sdma part here */
+static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v1_0 *sdma_hdr;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_hdr = (const struct sdma_firmware_header_v1_0 *)
+ adev->sdma.instance[i].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes);
+
+ if (i == 0) {
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA0_JT,
+ (uint32_t *)fw_data +
+ sdma_hdr->jt_offset,
+ sdma_hdr->jt_size * 4);
+ } else if (i == 1) {
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA1_JT,
+ (uint32_t *)fw_data +
+ sdma_hdr->jt_offset,
+ sdma_hdr->jt_size * 4);
+ }
+ }
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size, tmp;
+ uint64_t gpu_addr;
+
+ gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+ gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size);
+
+ tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR);
+ if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK |
+ RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) {
+ DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+ if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) {
+ DRM_ERROR("RLC ROM should halt itself\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program me ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_ME].offset;
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program ce ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_CE].offset;
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program pfp ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset;
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program mec1 ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset;
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i, r;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, mmCP_STAT);
+ bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS);
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
+ else
+ WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+
+ if (adev->job_hang && !enable)
+ return 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const __le32 *fw_data;
+ unsigned int i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+ gfx_v10_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+ adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, 0);
+
+ for (i = 0; i < pfp_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_DATA,
+ le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const __le32 *fw_data;
+ unsigned int i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+ le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.ce.ce_fw_obj,
+ &adev->gfx.ce.ce_fw_gpu_addr,
+ (void **)&adev->gfx.ce.ce_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r);
+ gfx_v10_0_ce_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+ adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+ upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, 0);
+
+ for (i = 0; i < ce_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_DATA,
+ le32_to_cpup(fw_data + ce_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned int i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+ gfx_v10_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+ adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, 0);
+
+ for (i = 0; i < me_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_DATA,
+ le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ gfx_v10_0_cp_gfx_enable(adev, false);
+
+ r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ r = gfx_v10_0_cp_gfx_load_ce_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load ce fw\n", r);
+ return r;
+ }
+
+ r = gfx_v10_0_cp_gfx_load_me_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+ int ctx_reg_offset;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
+
+ gfx_v10_0_cp_gfx_enable(adev, true);
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ amdgpu_ring_write(ring, ctx_reg_offset);
+ amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ amdgpu_ring_commit(ring);
+
+ /* submit cs packet to copy state 0 to next available state */
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+ }
+ return 0;
+}
+
+static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ if (!amdgpu_async_gfx_ring) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
+ }
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK);
+ break;
+ default:
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+ break;
+ }
+}
+
+static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Init gfx ring 1 for pipe 1 */
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v10_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ break;
+ }
+ } else {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ break;
+ }
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned int i;
+ u32 tmp;
+ u32 usec_timeout = 50000; /* Wait for 50 ms */
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v10_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr &
+ 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0);
+
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+ /*
+ * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
+ * different microcode than MEC1.
+ */
+
+ return 0;
+}
+
+static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ break;
+ default:
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ break;
+ }
+}
+
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
+
+ /* set up time quantum */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+
+ /*
+ * if there are 2 gfx rings, set the lower doorbell
+ * range of the first ring, otherwise the range of
+ * the second ring will override the first ring
+ */
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v10_0_gfx_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v10_0_cp_gfx_start(adev);
+}
+
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* disable doorbells */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v10_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_kiq_init_register(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v10_0_kiq_init_register(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v10_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v10_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v10_0_kcq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, 0);
+done:
+ return r;
+}
+
+static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v10_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v10_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v10_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v10_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v10_0_cp_gfx_enable(adev, enable);
+ gfx_v10_0_cp_compute_enable(adev, enable);
+}
+
+static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+ uint32_t data, pattern = 0xDEADBEEF;
+
+ /*
+ * check if mmVGT_ESGS_RING_SIZE_UMD
+ * has been remapped to mmVGT_ESGS_RING_SIZE
+ */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+
+ if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+ return true;
+ }
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
+ break;
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ return true;
+ default:
+ data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+
+ if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+ return true;
+ }
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
+ break;
+ }
+
+ return false;
+}
+
+static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * Initialize cam_index to 0
+ * index will auto-inc after each data writing
+ */
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+ data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ break;
+ default:
+ /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+ data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+}
+
+static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v10_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_emu_mode)
+ gfx_v10_0_init_golden_registers(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /**
+ * For gfx 10, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+ gfx_v10_0_disable_gpa_mode(adev);
+ }
+
+ /* if GRBM CAM not remapped, set up the remapping */
+ if (!gfx_v10_0_check_grbm_cam_remapping(adev))
+ gfx_v10_0_setup_grbm_cam_remapping(adev);
+
+ gfx_v10_0_constants_init(adev);
+
+ r = gfx_v10_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ gfx_v10_0_tcp_harvest(adev);
+
+ r = gfx_v10_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ gfx_v10_3_program_pbb_mode(adev);
+
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ gfx_v10_3_set_power_brake_sequence(adev);
+
+ return r;
+}
+
+static int gfx_v10_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v10_0_cp_gfx_enable(adev, false);
+ /* Remove the steps of clearing KIQ position.
+ * It causes GFX hang when another Win guest is rendering.
+ */
+ return 0;
+ }
+ gfx_v10_0_cp_enable(adev, false);
+ gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+
+ return 0;
+}
+
+static int gfx_v10_0_suspend(void *handle)
+{
+ return gfx_v10_0_hw_fini(handle);
+}
+
+static int gfx_v10_0_resume(void *handle)
+{
+ return gfx_v10_0_hw_init(handle);
+}
+
+static bool gfx_v10_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v10_0_wait_for_idle(void *handle)
+{
+ unsigned int i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v10_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
+ GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
+ GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP,
+ 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX,
+ 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP,
+ 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET,
+ SOFT_RESET_RLC,
+ 1);
+ break;
+ default:
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET,
+ SOFT_RESET_RLC,
+ 1);
+ break;
+ }
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ gfx_v10_0_rlc_stop(adev);
+
+ /* Disable GFX parsing/prefetching */
+ gfx_v10_0_cp_gfx_enable(adev, false);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v10_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock, clock_lo, clock_hi, hi_check;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ case IP_VERSION(10, 3, 6):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ default:
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ }
+ return clock;
+}
+
+static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
+ break;
+ default:
+ break;
+ }
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+
+ gfx_v10_0_set_kiq_pm4_funcs(adev);
+ gfx_v10_0_set_ring_funcs(adev);
+ gfx_v10_0_set_irq_funcs(adev);
+ gfx_v10_0_set_gds_init(adev);
+ gfx_v10_0_set_rlc_funcs(adev);
+ gfx_v10_0_set_mqd_funcs(adev);
+
+ /* init rlcg reg access ctrl */
+ gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v10_0_init_microcode(adev);
+}
+
+static int gfx_v10_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned int i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+ break;
+ }
+}
+
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+ break;
+ }
+}
+
+static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 0 - Disable some blocks' MGCG */
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000);
+ WREG32_SOC15(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xff000000);
+ WREG32_SOC15(GC, 0, mmCGTT_IA_CLK_CTRL, 0xff000000);
+
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+
+ }
+}
+
+static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ /* Enable 3D CGCG/CGLS */
+ if (enable) {
+ /* write cmd to clear cgcg/cgls ov */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable 3Dcgcg FSM(0x0000363f) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ /* Disable CGCG/CGLS */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
+ /* disable cgcg, cgls should be disabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+}
+
+static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
+ /* reset CGCG/CGLS bits */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+ }
+}
+
+static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* unset FGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ /* update FGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL);
+ /* unset RLC SRAM CLK GATER override */
+ data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+ /* update RLC SRAM CLK GATER override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* reset FGCG bits */
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ /* disable FGCG*/
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL);
+ /* reset RLC SRAM CLK GATER bits */
+ data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+ /* disable RLC SRAM CLK*/
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data);
+ }
+}
+
+static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t reg_idx = 0;
+ uint32_t i;
+
+ const uint32_t tcp_ctrl_regs[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
+ };
+
+ const uint32_t tcp_ctrl_regs_nv12[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ };
+
+ const uint32_t sm_ctlr_regs[] = {
+ mmCGTS_SA0_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA0_QUAD1_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD1_SM_CTRL_REG
+ };
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs_nv12[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
+ sm_ctlr_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
+ reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
+ WREG32(reg_idx, reg_data);
+ }
+}
+
+static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (enable) {
+ /* enable FGCG firstly*/
+ gfx_v10_0_update_fine_grain_clock_gating(adev, enable);
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v10_0_update_3d_clock_gating(adev, enable);
+ /* === CGCG + CGLS === */
+ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+
+ if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)))
+ gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v10_0_update_3d_clock_gating(adev, enable);
+ /* === MGCG + MGLS === */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+ /* disable fgcg at last*/
+ gfx_v10_0_update_fine_grain_clock_gating(adev, enable);
+ }
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
+{
+ u32 reg, data;
+
+ /* not for *_SOC15 */
+ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+}
+
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v10_0_update_spm_vmid_internal(adev, vmid);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
+}
+
+static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, data);
+
+ /*
+ * CGPG enablement required and the register to program the hysteresis value
+ * RLC_PG_DELAY_3.CGCG_ACTIVE_BEFORE_CGPG to the desired CGPG hysteresis value
+ * in refclk count. Note that RLC FW is modified to take 16 bits from
+ * RLC_PG_DELAY_3[15:0] as the hysteresis instead of just 8 bits.
+ *
+ * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us as part)
+ * of CGPG enablement starting point.
+ * Power/performance team will optimize it and might give a new value later.
+ */
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
+ WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v10_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v10_0_set_safe_mode,
+ .unset_safe_mode = gfx_v10_0_unset_safe_mode,
+ .init = gfx_v10_0_rlc_init,
+ .get_csb_size = gfx_v10_0_get_csb_size,
+ .get_csb_buffer = gfx_v10_0_get_csb_buffer,
+ .resume = gfx_v10_0_rlc_resume,
+ .stop = gfx_v10_0_rlc_stop,
+ .reset = gfx_v10_0_rlc_reset,
+ .start = gfx_v10_0_rlc_start,
+ .update_spm_vmid = gfx_v10_0_update_spm_vmid,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
+ .is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v10_0_set_safe_mode,
+ .unset_safe_mode = gfx_v10_0_unset_safe_mode,
+ .init = gfx_v10_0_rlc_init,
+ .get_csb_size = gfx_v10_0_get_csb_size,
+ .get_csb_buffer = gfx_v10_0_get_csb_buffer,
+ .resume = gfx_v10_0_rlc_resume,
+ .stop = gfx_v10_0_rlc_stop,
+ .reset = gfx_v10_0_rlc_reset,
+ .start = gfx_v10_0_rlc_start,
+ .update_spm_vmid = gfx_v10_0_update_spm_vmid,
+ .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
+};
+
+static int gfx_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 7):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v10_cntl_pg(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ gfx_v10_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx10 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
+ uint64_t wptr_tmp;
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always being used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
+ } else {
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+ }
+}
+
+static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx10 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
+ uint64_t wptr_tmp;
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
+ } else {
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx10 now */
+ }
+ }
+}
+
+static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
+ gfx_v10_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+ }
+
+ if (ring->is_mes_queue)
+ /* inherit vmid from mqd */
+ control |= 0x400000;
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ if (ring->is_mes_queue)
+ /* inherit vmid from mqd */
+ control |= 0x40000000;
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, ring->is_mes_queue ?
+ (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+}
+
+static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ if (ring->is_mes_queue)
+ gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
+ else
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ if (ring->adev->gfx.mcbp)
+ gfx_v10_0_ring_emit_ce_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned int ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+
+ return ret;
+}
+
+static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
+{
+ unsigned int cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_ce_ib_state ce_payload = {0};
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
+ int cnt;
+
+ cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ce_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_de_ib_state de_payload = {0};
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
+ int cnt;
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gds_backup) +
+ offsetof(struct v10_gfx_meta_data, de_payload);
+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+ }
+
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = false;
+
+ fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+}
+
+static void
+gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ break;
+ case 1:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we only enabled 1 gfx queue per pipe for now */
+ if (ring->me == me_id && ring->pipe == pipe_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ }
+}
+
+static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+ if (ring->me == 1)
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ else
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, target, tmp);
+ } else {
+ tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+ DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
+ me_id, pipe_id, queue_id);
+
+ amdgpu_fence_process(ring);
+ return 0;
+}
+
+static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
+ .name = "gfx_v10_0",
+ .early_init = gfx_v10_0_early_init,
+ .late_init = gfx_v10_0_late_init,
+ .sw_init = gfx_v10_0_sw_init,
+ .sw_fini = gfx_v10_0_sw_fini,
+ .hw_init = gfx_v10_0_hw_init,
+ .hw_fini = gfx_v10_0_hw_fini,
+ .suspend = gfx_v10_0_suspend,
+ .resume = gfx_v10_0_resume,
+ .is_idle = gfx_v10_0_is_idle,
+ .wait_for_idle = gfx_v10_0_wait_for_idle,
+ .soft_reset = gfx_v10_0_soft_reset,
+ .set_clockgating_state = gfx_v10_0_set_clockgating_state,
+ .set_powergating_state = gfx_v10_0_set_powergating_state,
+ .get_clockgating_state = gfx_v10_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v10_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v10_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v10_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place
+ * just prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 8, /* gfx_v10_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v10_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v10_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v10_0_ring_emit_sb,
+ .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
+ .preempt_ib = gfx_v10_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v10_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v10_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v10_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v10_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v10_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v10_0_ring_emit_gds_switch */
+ 7 + /* gfx_v10_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v10_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+ 8, /* gfx_v10_0_emit_mem_sync */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v10_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v10_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v10_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v10_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v10_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v10_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v10_0_ring_emit_gds_switch */
+ 7 + /* gfx_v10_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v10_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v10_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v10_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v10_0_ring_emit_rreg,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = {
+ .set = gfx_v10_0_set_eop_interrupt_state,
+ .process = gfx_v10_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
+ .set = gfx_v10_0_set_priv_reg_fault_state,
+ .process = gfx_v10_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
+ .set = gfx_v10_0_set_priv_inst_fault_state,
+ .process = gfx_v10_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = {
+ .set = gfx_v10_0_kiq_set_interrupt_state,
+ .process = gfx_v10_0_kiq_irq,
+};
+
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
+
+ adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
+ break;
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 3, 0):
+ adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
+{
+ unsigned int total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+
+ adev->gds.gds_size = 0x10000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v10_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v10_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v10_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v10_0_compute_mqd_init;
+}
+
+static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 disabled_mask =
+ ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+ u32 efuse_setting = 0;
+ u32 vbios_setting = 0;
+
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+ efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+ vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ disabled_mask |= efuse_setting | vbios_setting;
+
+ return (~disabled_mask);
+}
+
+static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ unsigned int disable_masks[4 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) ||
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) &&
+ ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 4 && j < 2)
+ gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
+ cu_info->bitmap[0][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev)
+{
+ uint32_t efuse_setting, vbios_setting, disabled_sa, max_sa_mask;
+
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
+ efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE);
+ vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ max_sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+ disabled_sa = efuse_setting | vbios_setting;
+ disabled_sa &= max_sa_mask;
+
+ return disabled_sa;
+}
+
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev)
+{
+ uint32_t max_sa_per_se, max_sa_per_se_mask, max_shader_engines;
+ uint32_t disabled_sa_mask, se_index, disabled_sa_per_se;
+
+ disabled_sa_mask = gfx_v10_3_get_disabled_sa(adev);
+
+ max_sa_per_se = adev->gfx.config.max_sh_per_se;
+ max_sa_per_se_mask = (1 << max_sa_per_se) - 1;
+ max_shader_engines = adev->gfx.config.max_shader_engines;
+
+ for (se_index = 0; max_shader_engines > se_index; se_index++) {
+ disabled_sa_per_se = disabled_sa_mask >> (se_index * max_sa_per_se);
+ disabled_sa_per_se &= max_sa_per_se_mask;
+ if (disabled_sa_per_se == max_sa_per_se_mask) {
+ WREG32_FIELD15(GC, 0, PA_SC_ENHANCE_3, FORCE_PBB_WORKLOAD_MODE_TO_ZERO, 1);
+ break;
+ }
+ }
+}
+
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) |
+ (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) |
+ (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+
+ WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL);
+ WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA,
+ (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) |
+ (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) |
+ (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) |
+ (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT));
+
+ WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid,
+ (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) |
+ (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) |
+ (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT));
+
+ WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL);
+
+ WREG32_SOC15(GC, 0, mmDIDT_IND_DATA,
+ (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
+}
+
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
new file mode 100644
index 000000000..b442e5032
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V10_0_H__
+#define __GFX_V10_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v10_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
new file mode 100644
index 000000000..d0c3ec9f4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -0,0 +1,6409 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "amdgpu_atomfirmware.h"
+#include "imu_v11_0.h"
+#include "soc21.h"
+#include "nvd.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15.h"
+#include "soc15d.h"
+#include "clearstate_gfx11.h"
+#include "v11_structs.h"
+#include "gfx_v11_0.h"
+#include "gfx_v11_0_3.h"
+#include "nbio_v4_3.h"
+#include "mes_v11_0.h"
+
+#define GFX11_NUM_GFX_RINGS 1
+#define GFX11_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
+
+#define regCGTT_WD_CLK_CTRL 0x5086
+#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx11_kiq_set_resources,
+ .kiq_map_queues = gfx11_kiq_map_queues,
+ .kiq_unmap_queues = gfx11_kiq_unmap_queues,
+ .kiq_query_status = gfx11_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
+}
+
+static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
+ break;
+ default:
+ break;
+ }
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
+}
+
+static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ if (ring->is_mes_queue) {
+ uint32_t padding, offset;
+
+ offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+ padding = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+
+ ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
+ *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ if (!ring->is_mes_queue)
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+ char fw_name[40];
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 1505) &&
+ (adev->gfx.pfp_fw_version >= 1600) &&
+ (adev->gfx.mec_fw_version >= 512)) {
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.cp_gfx_shadow = true;
+ else
+ adev->gfx.cp_gfx_shadow = false;
+ }
+ break;
+ default:
+ adev->gfx.cp_gfx_shadow = false;
+ break;
+ }
+}
+
+static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char fw_name[40];
+ char ucode_prefix[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err)
+ goto out;
+ /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
+ adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+ (union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data, 2, 0);
+ if (adev->gfx.rs64_enable) {
+ dev_info(adev->dev, "CP RS64 enable\n");
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+ if (adev->gfx.rs64_enable) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
+ } else {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 11.0.0. */
+ adev->gfx.mec2_fw = NULL;
+
+ gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* set PA_SC_TILE_STEERING_OVERRIDE */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int ctx_reg_offset;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(ctx_reg_offset);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx11_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
+ return 0;
+}
+
+static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 3 wave data */
+ dst[(*no_fields)++] = 3;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc21_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ if (adev->gfx.cp_gfx_shadow) {
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ return 0;
+ } else {
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -ENOTSUPP;
+ }
+}
+
+static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v11_0_select_se_sh,
+ .read_wave_data = &gfx_v11_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
+};
+
+static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
+{
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.ras = &gfx_v11_0_3_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ int r;
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX11_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC21_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+} rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
+
+static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
+ (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
+ rlc_autoload_info[ucode->id].size = ucode->size * 4;
+
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC21_FIRMWARE_ID id;
+
+ gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
+
+ return total_size;
+}
+
+static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v11_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC21_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size,
+ uint32_t *fw_autoload_mask)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+
+ if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
+ *(uint64_t *)fw_autoload_mask |= 1ULL << id;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ void *data;
+ uint32_t size;
+ uint64_t *toc_ptr;
+
+ *(uint64_t *)fw_autoload_mask |= 0x1;
+
+ DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint64_t *)data + size / 8 - 1;
+ *toc_ptr = *(uint64_t *)fw_autoload_mask;
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
+ data, size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ if (adev->gfx.rs64_enable) {
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size, fw_autoload_mask);
+ } else {
+ /* pfp ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* me ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
+ fw_data, fw_size, fw_autoload_mask);
+
+ /* mec ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ cp_hdr->jt_size * 4;
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size, fw_autoload_mask);
+ }
+ }
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v2_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+ fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
+ uint32_t *fw_autoload_mask)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe==0) {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ ucode_id, fw_data, fw_size, fw_autoload_mask);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+ data_id, fw_data, fw_size, fw_autoload_mask);
+ }
+}
+
+static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t autoload_fw_id[2];
+
+ memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
+ gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
+
+ rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v11_0_disable_gpa_mode(adev);
+
+ return 0;
+}
+
+static int gfx_v11_0_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id = 0;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfxhub.funcs->init(adev);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ /* FED error */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
+ &adev->gfx.rlc_gc_fed_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->init_microcode) {
+ r = adev->gfx.imu.funcs->init_microcode(adev);
+ if (r)
+ DRM_ERROR("Failed to load imu firmware!\n");
+ }
+ }
+
+ gfx_v11_0_me_init(adev);
+
+ r = gfx_v11_0_rlc_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v11_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v11_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v11_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ gfx_v11_0_pfp_fini(adev);
+ gfx_v11_0_me_fini(adev);
+ gfx_v11_0_rlc_fini(adev);
+ gfx_v11_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v11_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v11_0_free_microcode(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap |= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < 16; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
+{
+ /* TCCs are global (not instanced). */
+ uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
+ RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
+
+ adev->gfx.config.tcc_disabled_mask =
+ REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+ (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v11_0_setup_rb(adev);
+ gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v11_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* Set whether texture coordinate truncation is conformant. */
+ tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
+ adev->gfx.config.ta_cntl2_truncate_coord_mode =
+ REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc21_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v11_0_init_compute_vmid(adev);
+ gfx_v11_0_init_gds_vmid(adev);
+}
+
+static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
+}
+
+static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
+}
+
+static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v11_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v11_0_load_rlc_iram_dram_microcode(adev);
+ if (version_minor == 3)
+ gfx_v11_0_load_rlcp_rlcv_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v11_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v11_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v11_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v11_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /* Program me ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /* Program pfp ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ /* Program mec1 ucode address into intruction cache address register */
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i, pipe_id;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(addr2));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(addr2));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ unsigned i;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(addr2));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc21_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i, r;
+ uint64_t addr, addr2;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
+ bootload_status = RREG32_SOC15(GC, 0,
+ regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
+ else
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.rs64_enable) {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
+ r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
+ r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
+ addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
+ r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
+ if (r)
+ return r;
+ } else {
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
+ r = gfx_v11_0_config_me_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
+ r = gfx_v11_0_config_pfp_cache(adev, addr);
+ if (r)
+ return r;
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
+ r = gfx_v11_0_config_mec_cache(adev, addr);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+ gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
+
+ for (i = 0; i < pfp_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
+ le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+ gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
+
+ for (i = 0; i < me_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
+ le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v11_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v11_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_gfx_enable(adev, false);
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+ int ctx_reg_offset;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_gfx_enable(adev, true);
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ amdgpu_ring_write(ring, ctx_reg_offset);
+ amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+
+ /* submit cs packet to copy state 0 to next available state */
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+ }
+ return 0;
+}
+
+static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Init gfx ring 1 for pipe 1 */
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
+
+ gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v11_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ if (adev->gfx.rs64_enable) {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+
+ if (enable) {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
+ if (!adev->enable_mes_kiq)
+ data = REG_SET_FIELD(data, CP_MEC_CNTL,
+ MEC_ME2_HALT, 0);
+ } else {
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
+ data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
+ }
+
+ udelay(50);
+}
+
+static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 *fw = NULL;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
+
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v11_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v11_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ memcpy(fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc21_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
+
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+}
+
+static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+
+ /* set up time quantum */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v11_0_gfx_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v11_0_cp_gfx_start(adev);
+}
+
+static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v11_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v11_0_kiq_init_register(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v11_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ ring->sched.ready = true;
+ return 0;
+}
+
+static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v11_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v11_0_kcq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, 0);
+done:
+ return r;
+}
+
+static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v11_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ if (adev->gfx.rs64_enable)
+ r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
+ else
+ r = gfx_v11_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v11_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v11_0_cp_compute_enable(adev, true);
+ gfx_v11_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v11_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v11_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v11_0_cp_gfx_enable(adev, enable);
+ gfx_v11_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
+ false : true;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* select RS64 */
+ if (adev->gfx.rs64_enable) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(100);
+}
+
+static int get_gb_addr_config(struct amdgpu_device * adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v11_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ }
+ /* rlc autoload firmware */
+ r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v11_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ adev->gfx.is_poweron = true;
+
+ if(get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ adev->gfx.rs64_enable)
+ gfx_v11_0_config_gfx_rs64(adev);
+
+ r = gfx_v11_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_emu_mode)
+ gfx_v11_0_init_golden_registers(adev);
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ /**
+ * For gfx 11, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ gfx_v11_0_constants_init(adev);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ gfx_v11_0_select_cp_fw_arch(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v11_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v11_0_tcp_harvest(adev);
+
+ r = gfx_v11_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v11_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ /* Remove the steps disabling CPG and clearing KIQ position,
+ * so that CP could perform IDLE-SAVE during switch. Those
+ * steps are necessary to avoid a DMAR error in gfx9 but it is
+ * not reproduced on gfx11.
+ */
+ return 0;
+
+ gfx_v11_0_cp_enable(adev, false);
+ gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v11_0_suspend(void *handle)
+{
+ return gfx_v11_0_hw_fini(handle);
+}
+
+static int gfx_v11_0_resume(void *handle)
+{
+ return gfx_v11_0_hw_init(handle);
+}
+
+static bool gfx_v11_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v11_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v11_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ int i, j, k;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.me.num_me; ++i) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
+ }
+ }
+ }
+
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
+
+ // Read CP_VMID_RESET register three times.
+ // to get sufficient time for GFX_HQD_ACTIVE reach 0
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
+ !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait all pipes clean\n");
+ return -EINVAL;
+ }
+
+ /********** trigger soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+ /********** exit soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 0);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
+ WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait CP_VMID_RESET to 0\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ return gfx_v11_0_cp_resume(adev);
+}
+
+static bool gfx_v11_0_check_soft_reset(void *handle)
+{
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int gfx_v11_0_post_soft_reset(void *handle)
+{
+ /**
+ * GFX soft reset will impact MES, need resume MES when do GFX soft reset
+ */
+ return amdgpu_mes_resume((struct amdgpu_device *)handle);
+}
+
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+ uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+ } else {
+ preempt_disable();
+ clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+ }
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v11_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v11_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
+
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+
+ gfx_v11_0_set_kiq_pm4_funcs(adev);
+ gfx_v11_0_set_ring_funcs(adev);
+ gfx_v11_0_set_irq_funcs(adev);
+ gfx_v11_0_set_gds_init(adev);
+ gfx_v11_0_set_rlc_funcs(adev);
+ gfx_v11_0_set_mqd_funcs(adev);
+ gfx_v11_0_set_imu_funcs(adev);
+
+ gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v11_0_init_microcode(adev);
+}
+
+static int gfx_v11_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ }
+}
+
+static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v11_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v11_0_update_sram_fgcg(adev, enable);
+
+ gfx_v11_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 reg, data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v11_0_set_safe_mode,
+ .unset_safe_mode = gfx_v11_0_unset_safe_mode,
+ .init = gfx_v11_0_rlc_init,
+ .get_csb_size = gfx_v11_0_get_csb_size,
+ .get_csb_buffer = gfx_v11_0_get_csb_buffer,
+ .resume = gfx_v11_0_rlc_resume,
+ .stop = gfx_v11_0_rlc_stop,
+ .reset = gfx_v11_0_rlc_reset,
+ .start = gfx_v11_0_rlc_start,
+ .update_spm_vmid = gfx_v11_0_update_spm_vmid,
+};
+
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static int gfx_v11_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v11_cntl_pg(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ gfx_v11_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx11 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
+ uint64_t wptr_tmp;
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always being used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
+ } else {
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+ }
+}
+
+static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx11 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
+ uint64_t wptr_tmp;
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
+ } else {
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx11 now */
+ }
+ }
+}
+
+static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (vmid)
+ gfx_v11_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+ }
+
+ if (ring->is_mes_queue)
+ /* inherit vmid from mqd */
+ control |= 0x400000;
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ if (ring->is_mes_queue)
+ /* inherit vmid from mqd */
+ control |= 0x40000000;
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GL2_INV |
+ PACKET3_RELEASE_MEM_GCR_GL2_US |
+ PACKET3_RELEASE_MEM_GCR_GL1_INV |
+ PACKET3_RELEASE_MEM_GCR_GLV_INV |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV |
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, ring->is_mes_queue ?
+ (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+}
+
+static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ if (ring->is_mes_queue)
+ gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
+ else
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+ u64 shadow_va, u64 csa_va,
+ u64 gds_va, bool init_shadow,
+ int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (!adev->gfx.cp_gfx_shadow)
+ return;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+ amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+ amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+ amdgpu_ring_write(ring, lower_32_bits(gds_va));
+ amdgpu_ring_write(ring, upper_32_bits(gds_va));
+ amdgpu_ring_write(ring, lower_32_bits(csa_va));
+ amdgpu_ring_write(ring, upper_32_bits(csa_va));
+ amdgpu_ring_write(ring, shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+ amdgpu_ring_write(ring, init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+}
+
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+
+ return ret;
+}
+
+static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_de_ib_state de_payload = {0};
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
+ int cnt;
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gds_backup) +
+ offsetof(struct v10_gfx_meta_data, de_payload);
+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+ }
+
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32_SOC15(GC, 0, regSQ_CMD, value);
+}
+
+static void
+gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ case 1:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we only enabled 1 gfx queue per pipe for now */
+ if (ring->me == me_id && ring->pipe == pipe_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
+ return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
+
+ return 0;
+}
+
+#if 0
+static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+ target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, target, tmp);
+ } else {
+ tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+ tmp = RREG32_SOC15_IP(GC, target);
+ tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+#endif
+
+static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
+ .name = "gfx_v11_0",
+ .early_init = gfx_v11_0_early_init,
+ .late_init = gfx_v11_0_late_init,
+ .sw_init = gfx_v11_0_sw_init,
+ .sw_fini = gfx_v11_0_sw_fini,
+ .hw_init = gfx_v11_0_hw_init,
+ .hw_fini = gfx_v11_0_hw_fini,
+ .suspend = gfx_v11_0_suspend,
+ .resume = gfx_v11_0_resume,
+ .is_idle = gfx_v11_0_is_idle,
+ .wait_for_idle = gfx_v11_0_wait_for_idle,
+ .soft_reset = gfx_v11_0_soft_reset,
+ .check_soft_reset = gfx_v11_0_check_soft_reset,
+ .post_soft_reset = gfx_v11_0_post_soft_reset,
+ .set_clockgating_state = gfx_v11_0_set_clockgating_state,
+ .set_powergating_state = gfx_v11_0_set_powergating_state,
+ .get_clockgating_state = gfx_v11_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 9 + /* SET_Q_PREEMPTION_MODE */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 8, /* gfx_v11_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+ .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
+ .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
+ .preempt_ib = gfx_v11_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v11_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v11_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
+ 8, /* gfx_v11_0_emit_mem_sync */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v11_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v11_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v11_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v11_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v11_0_ring_emit_gds_switch */
+ 7 + /* gfx_v11_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v11_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v11_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v11_0_ring_test_ring,
+ .test_ib = gfx_v11_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v11_0_ring_emit_rreg,
+ .emit_wreg = gfx_v11_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
+ .set = gfx_v11_0_set_eop_interrupt_state,
+ .process = gfx_v11_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
+ .set = gfx_v11_0_set_priv_reg_fault_state,
+ .process = gfx_v11_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
+ .set = gfx_v11_0_set_priv_inst_fault_state,
+ .process = gfx_v11_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
+ .process = gfx_v11_0_rlc_gc_fed_irq,
+};
+
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
+
+ adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
+ adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
+
+}
+
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
+}
+
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
+}
+
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
+{
+ unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+
+ adev->gds.gds_size = 0x1000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v11_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v11_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v11_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v11_0_compute_mqd_init;
+}
+
+static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX11 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
new file mode 100644
index 000000000..10cfc29c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_H__
+#define __GFX_V11_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
new file mode 100644
index 000000000..26d6286d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "soc21.h"
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v11_0.h"
+
+
+static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t rlc_status0 = 0, rlc_status1 = 0;
+ struct ras_common_if *ras_if = NULL;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0));
+ rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1));
+
+ if (!rlc_status0 && !rlc_status1) {
+ dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n");
+ return 0;
+ }
+
+ /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR))
+ ras_if = adev->sdma.ras_if;
+ else
+ ras_if = adev->gfx.ras_if;
+
+ if (!ras_if) {
+ dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n",
+ rlc_status0);
+ return -EINVAL;
+ }
+
+ dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ /* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
+ if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
+ (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
+ !entry->vmid && !entry->pasid) {
+ uint32_t rlc_status0 = 0;
+
+ rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
+
+ if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+ REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) {
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ return 0;
+}
+
+struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
+ .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
+ .poison_consumption_handler = gfx_v11_0_3_poison_consumption_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
new file mode 100644
index 000000000..672c7920b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_3_H__
+#define __GFX_V11_0_3_H__
+
+extern struct amdgpu_gfx_ras gfx_v11_0_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
new file mode 100644
index 000000000..34f9211b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -0,0 +1,3611 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_ucode.h"
+#include "clearstate_si.h"
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+#include "si_enums.h"
+#include "si.h"
+
+static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_me.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_ce.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_me.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/verde_pfp.bin");
+MODULE_FIRMWARE("amdgpu/verde_me.bin");
+MODULE_FIRMWARE("amdgpu/verde_ce.bin");
+MODULE_FIRMWARE("amdgpu/verde_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/oland_pfp.bin");
+MODULE_FIRMWARE("amdgpu/oland_me.bin");
+MODULE_FIRMWARE("amdgpu/oland_ce.bin");
+MODULE_FIRMWARE("amdgpu/oland_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/hainan_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hainan_me.bin");
+MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
+MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
+
+static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
+static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
+
+#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
+#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
+#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
+#define MICRO_TILE_MODE(x) ((x) << 0)
+#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
+#define BANK_WIDTH(x) ((x) << 14)
+#define BANK_HEIGHT(x) ((x) << 16)
+#define MACRO_TILE_ASPECT(x) ((x) << 18)
+#define NUM_BANKS(x) ((x) << 20)
+
+static const u32 verde_rlc_save_restore_register_list[] =
+{
+ (0x8000 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0xe80 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0xe80 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x98f0 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xe7c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9148 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9148 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9150 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x897c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8d8c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac54 >> 2),
+ 0X00000000,
+ 0x3,
+ (0x9c00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9910 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9914 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9918 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x991c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9920 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9924 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9928 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x992c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9930 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9934 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9938 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x993c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9940 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9944 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9948 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x994c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9950 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9954 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9958 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x995c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9960 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9964 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9968 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x996c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9970 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9974 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9978 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x997c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9980 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9984 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9988 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x998c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c08 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0xe84 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0xe84 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x914c >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x914c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9354 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9354 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9060 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9364 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x913c >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e0 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e4 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e0 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e4 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e50 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8c0c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e58 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8e5c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9508 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x950c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9494 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x88cc >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x89b0 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8b10 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9830 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9838 >> 2),
+ 0x00000000,
+ (0x9c00 << 16) | (0x9a10 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8000 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8001 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8001 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8040 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ (0x8041 << 16) | (0x9870 >> 2),
+ 0x00000000,
+ (0x8041 << 16) | (0x9874 >> 2),
+ 0x00000000,
+ 0x00000000
+};
+
+static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct rlc_firmware_header_v1_0 *rlc_hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ chip_name = "tahiti";
+ break;
+ case CHIP_PITCAIRN:
+ chip_name = "pitcairn";
+ break;
+ case CHIP_VERDE:
+ chip_name = "verde";
+ break;
+ case CHIP_OLAND:
+ chip_name = "oland";
+ break;
+ case CHIP_HAINAN:
+ chip_name = "hainan";
+ break;
+ default: BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+
+out:
+ if (err) {
+ pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ }
+ return err;
+}
+
+static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ u32 reg_offset, split_equal_to_row_size, *tilemode;
+
+ memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array));
+ tilemode = adev->gfx.config.tile_mode_array;
+
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
+ break;
+ case 2:
+ default:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
+ break;
+ case 4:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
+ break;
+ }
+
+ if (adev->asic_type == CHIP_VERDE) {
+ tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16);
+ tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+ tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16);
+ tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16);
+ tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16);
+ tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+ } else if (adev->asic_type == CHIP_OLAND) {
+ tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(split_equal_to_row_size) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(split_equal_to_row_size) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(split_equal_to_row_size) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(split_equal_to_row_size) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16);
+ tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+ tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+ tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ NUM_BANKS(ADDR_SURF_8_BANK) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+ } else if (adev->asic_type == CHIP_HAINAN) {
+ tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2);
+ tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+ tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2);
+ tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2);
+ tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2);
+ tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK);
+ tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+ } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) {
+ tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+ tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+ tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+ tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+ tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK);
+ tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
+ tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK) |
+ TILE_SPLIT(split_equal_to_row_size);
+ tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK);
+ tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+ ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_2_BANK);
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+ } else {
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ }
+}
+
+static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
+
+ if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
+ data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
+ else if (se_num == 0xffffffff)
+ data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
+ (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
+ else if (sh_num == 0xffffffff)
+ data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
+ else
+ data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
+ (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
+ WREG32(mmGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_RB_BACKEND_DISABLE) |
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+
+ data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/
+ adev->gfx.config.max_sh_per_se);
+
+ return ~data & mask;
+}
+
+static void gfx_v6_0_raster_config(struct amdgpu_device *adev, u32 *rconf)
+{
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ *rconf |=
+ (2 << PA_SC_RASTER_CONFIG__RB_XSEL2__SHIFT) |
+ (1 << PA_SC_RASTER_CONFIG__RB_XSEL__SHIFT) |
+ (2 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT) |
+ (1 << PA_SC_RASTER_CONFIG__PKR_YSEL__SHIFT) |
+ (2 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT) |
+ (2 << PA_SC_RASTER_CONFIG__SE_XSEL__SHIFT) |
+ (2 << PA_SC_RASTER_CONFIG__SE_YSEL__SHIFT);
+ break;
+ case CHIP_VERDE:
+ *rconf |=
+ (1 << PA_SC_RASTER_CONFIG__RB_XSEL__SHIFT) |
+ (2 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT) |
+ (1 << PA_SC_RASTER_CONFIG__PKR_YSEL__SHIFT);
+ break;
+ case CHIP_OLAND:
+ *rconf |= (1 << PA_SC_RASTER_CONFIG__RB_YSEL__SHIFT);
+ break;
+ case CHIP_HAINAN:
+ *rconf |= 0x0;
+ break;
+ default:
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ break;
+ }
+}
+
+static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ u32 raster_config, unsigned rb_mask,
+ unsigned num_rb)
+{
+ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK;
+
+ if (!se_mask[idx])
+ raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
+ else
+ raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK;
+
+ if (!pkr0_mask)
+ raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
+ else
+ raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK;
+
+ if (!rb0_mask)
+ raster_config_se |=
+ RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
+ else
+ raster_config_se |=
+ RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK;
+
+ if (!rb0_mask)
+ raster_config_se |=
+ RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
+ else
+ raster_config_se |=
+ RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
+ }
+ }
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on SI */
+ gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on SI */
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+}
+
+static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 raster_config = 0;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ unsigned num_rb_pipes;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = gfx_v6_0_get_rb_active_bitmap(adev);
+ active_rbs |= data <<
+ ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+
+ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines, 16);
+
+ gfx_v6_0_raster_config(adev, &raster_config);
+
+ if (!adev->gfx.config.backend_enable_mask ||
+ adev->gfx.config.num_rbs >= num_rb_pipes)
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ else
+ gfx_v6_0_write_harvested_raster_configs(adev, raster_config,
+ adev->gfx.config.backend_enable_mask,
+ num_rb_pipes);
+
+ /* cache the values for userspace */
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ adev->gfx.config.rb_config[i][j].rb_backend_disable =
+ RREG32(mmCC_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].raster_config =
+ RREG32(mmPA_SC_RASTER_CONFIG);
+ }
+ }
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
+ RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+ return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
+}
+
+
+static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
+{
+ int i, j, k;
+ u32 data, mask;
+ u32 active_cu = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
+ active_cu = gfx_v6_0_get_cu_enabled(adev);
+
+ mask = 1;
+ for (k = 0; k < 16; k++) {
+ mask <<= k;
+ if (active_cu & mask) {
+ data &= ~mask;
+ WREG32(mmSPI_STATIC_THREAD_MGMT_3, data);
+ break;
+ }
+ }
+ }
+ }
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v6_0_config_init(struct amdgpu_device *adev)
+{
+ adev->gfx.config.double_offchip_lds_buf = 0;
+}
+
+static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config = 0;
+ u32 mc_arb_ramcfg;
+ u32 sx_debug_1;
+ u32 hdp_host_path_cntl;
+ u32 tmp;
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ adev->gfx.config.max_shader_engines = 2;
+ adev->gfx.config.max_tile_pipes = 12;
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_sh_per_se = 2;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 12;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_PITCAIRN:
+ adev->gfx.config.max_shader_engines = 2;
+ adev->gfx.config.max_tile_pipes = 8;
+ adev->gfx.config.max_cu_per_sh = 5;
+ adev->gfx.config.max_sh_per_se = 2;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 8;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_VERDE:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 5;
+ adev->gfx.config.max_sh_per_se = 2;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_OLAND:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 6;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_HAINAN:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 5;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 1;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x40;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
+ WREG32(mmSRBM_INT_CNTL, 1);
+ WREG32(mmSRBM_INT_ACK, 1);
+
+ WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+
+ adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+ mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+ adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
+ adev->gfx.config.mem_max_burst_length_bytes = 256;
+ tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
+ adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+ if (adev->gfx.config.mem_row_size_in_kb > 4)
+ adev->gfx.config.mem_row_size_in_kb = 4;
+ adev->gfx.config.shader_engine_tile_size = 32;
+ adev->gfx.config.num_gpus = 1;
+ adev->gfx.config.multi_gpu_tile_size = 64;
+
+ gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ default:
+ gb_addr_config |= 0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT;
+ break;
+ case 2:
+ gb_addr_config |= 1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT;
+ break;
+ case 4:
+ gb_addr_config |= 2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT;
+ break;
+ }
+ gb_addr_config &= ~GB_ADDR_CONFIG__NUM_SHADER_ENGINES_MASK;
+ if (adev->gfx.config.max_shader_engines == 2)
+ gb_addr_config |= 1 << GB_ADDR_CONFIG__NUM_SHADER_ENGINES__SHIFT;
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmDMIF_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
+ WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmDMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+ WREG32(mmDMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
+
+#if 0
+ if (adev->has_uvd) {
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
+ }
+#endif
+ gfx_v6_0_tiling_mode_table_init(adev);
+
+ gfx_v6_0_setup_rb(adev);
+
+ gfx_v6_0_setup_spi(adev);
+
+ gfx_v6_0_get_cu_info(adev);
+ gfx_v6_0_config_init(adev);
+
+ WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+ (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+ WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+
+ sx_debug_1 = RREG32(mmSX_DEBUG_1);
+ WREG32(mmSX_DEBUG_1, sx_debug_1);
+
+ WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
+
+ WREG32(mmPA_SC_FIFO_SIZE, ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
+
+ WREG32(mmVGT_NUM_INSTANCES, 1);
+ WREG32(mmCP_PERFMON_CNTL, 0);
+ WREG32(mmSQ_CONFIG, 0);
+ WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS, ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
+ (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
+
+ WREG32(mmVGT_CACHE_INVALIDATION,
+ (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
+ (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
+
+ WREG32(mmVGT_GS_VERTEX_REUSE, 16);
+ WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
+
+ WREG32(mmCB_PERFCOUNTER0_SELECT0, 0);
+ WREG32(mmCB_PERFCOUNTER0_SELECT1, 0);
+ WREG32(mmCB_PERFCOUNTER1_SELECT0, 0);
+ WREG32(mmCB_PERFCOUNTER1_SELECT1, 0);
+ WREG32(mmCB_PERFCOUNTER2_SELECT0, 0);
+ WREG32(mmCB_PERFCOUNTER2_SELECT1, 0);
+ WREG32(mmCB_PERFCOUNTER3_SELECT0, 0);
+ WREG32(mmCB_PERFCOUNTER3_SELECT1, 0);
+
+ hdp_host_path_cntl = RREG32(mmHDP_HOST_PATH_CNTL);
+ WREG32(mmHDP_HOST_PATH_CNTL, hdp_host_path_cntl);
+
+ WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
+ (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
+
+ udelay(50);
+}
+
+static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSCRATCH_REG0);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+ EVENT_INDEX(0));
+}
+
+static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ /* flush read cache over gart */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, (mmCP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 10); /* poll interval */
+ /* EVENT_WRITE_EOP - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ ((write64bit ? 2 : 1) << CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT) |
+ ((int_sel ? 2 : 0) << CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+}
+
+static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ (ib->gpu_addr & 0xFFFFFFFC));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+ amdgpu_ring_write(ring, control);
+}
+
+/**
+ * gfx_v6_0_ring_test_ib - basic ring IB test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Allocate an IB and execute it on the gfx ring (SI).
+ * Provides a basic gfx ring test to verify that IBs are working.
+ * Returns 0 on success, error on failure.
+ */
+static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
+ struct amdgpu_ib ib;
+ uint32_t tmp = 0;
+ long r;
+
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ return r;
+
+ ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_CONFIG_REG_START;
+ ib.ptr[2] = 0xDEADBEEF;
+ ib.length_dw = 3;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ goto error;
+ }
+ tmp = RREG32(mmSCRATCH_REG0);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+error:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+ return r;
+}
+
+static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable) {
+ WREG32(mmCP_ME_CNTL, 0);
+ } else {
+ WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
+ CP_ME_CNTL__PFP_HALT_MASK |
+ CP_ME_CNTL__CE_HALT_MASK));
+ WREG32(mmSCRATCH_UMSK, 0);
+ }
+ udelay(50);
+}
+
+static int gfx_v6_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ unsigned i;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ gfx_v6_0_cp_gfx_enable(adev, false);
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* PFP */
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_PFP_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_PFP_UCODE_ADDR, 0);
+
+ /* CE */
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_CE_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_CE_UCODE_ADDR, 0);
+
+ /* ME */
+ fw_data = (const __be32 *)
+ (adev->gfx.me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_ME_RAM_WADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_ME_RAM_WADDR, 0);
+
+ WREG32(mmCP_PFP_UCODE_ADDR, 0);
+ WREG32(mmCP_CE_UCODE_ADDR, 0);
+ WREG32(mmCP_ME_RAM_WADDR, 0);
+ WREG32(mmCP_ME_RAM_RADDR, 0);
+ return 0;
+}
+
+static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ int r, i;
+
+ r = amdgpu_ring_alloc(ring, 7 + 4);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
+ amdgpu_ring_write(ring, 0x1);
+ amdgpu_ring_write(ring, 0x0);
+ amdgpu_ring_write(ring, adev->gfx.config.max_hw_contexts - 1);
+ amdgpu_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0xc000);
+ amdgpu_ring_write(ring, 0xe000);
+ amdgpu_ring_commit(ring);
+
+ gfx_v6_0_cp_gfx_enable(adev, true);
+
+ r = amdgpu_ring_alloc(ring, gfx_v6_0_get_csb_size(adev) + 10);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ amdgpu_ring_write(ring, 0x00000316);
+ amdgpu_ring_write(ring, 0x0000000e);
+ amdgpu_ring_write(ring, 0x00000010);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ int r;
+ u64 rptr_addr;
+
+ WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
+ WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
+
+ /* Set the write pointer delay */
+ WREG32(mmCP_RB_WPTR_DELAY, 0);
+
+ WREG32(mmCP_DEBUG, 0);
+ WREG32(mmSCRATCH_ADDR, 0);
+
+ /* ring 0 - compute and gfx */
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+
+#ifdef __BIG_ENDIAN
+ tmp |= BUF_SWAP_32BIT;
+#endif
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
+ ring->wptr = 0;
+ WREG32(mmCP_RB0_WPTR, ring->wptr);
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+
+ WREG32(mmSCRATCH_UMSK, 0);
+
+ mdelay(1);
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ WREG32(mmCP_RB0_BASE, ring->gpu_addr >> 8);
+
+ /* start the rings */
+ gfx_v6_0_cp_gfx_start(adev);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->gfx.gfx_ring[0])
+ return RREG32(mmCP_RB0_WPTR);
+ else if (ring == &adev->gfx.compute_ring[0])
+ return RREG32(mmCP_RB1_WPTR);
+ else if (ring == &adev->gfx.compute_ring[1])
+ return RREG32(mmCP_RB2_WPTR);
+ else
+ BUG();
+}
+
+static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ (void)RREG32(mmCP_RB0_WPTR);
+}
+
+static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->gfx.compute_ring[0]) {
+ WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ (void)RREG32(mmCP_RB1_WPTR);
+ } else if (ring == &adev->gfx.compute_ring[1]) {
+ WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr));
+ (void)RREG32(mmCP_RB2_WPTR);
+ } else {
+ BUG();
+ }
+
+}
+
+static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ int i, r;
+ u64 rptr_addr;
+
+ /* ring1 - compute only */
+ /* Set ring buffer size */
+
+ ring = &adev->gfx.compute_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+ tmp |= BUF_SWAP_32BIT;
+#endif
+ WREG32(mmCP_RB1_CNTL, tmp);
+
+ WREG32(mmCP_RB1_CNTL, tmp | CP_RB1_CNTL__RB_RPTR_WR_ENA_MASK);
+ ring->wptr = 0;
+ WREG32(mmCP_RB1_WPTR, ring->wptr);
+
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32(mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32(mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+
+ mdelay(1);
+ WREG32(mmCP_RB1_CNTL, tmp);
+ WREG32(mmCP_RB1_BASE, ring->gpu_addr >> 8);
+
+ ring = &adev->gfx.compute_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+ tmp |= BUF_SWAP_32BIT;
+#endif
+ WREG32(mmCP_RB2_CNTL, tmp);
+
+ WREG32(mmCP_RB2_CNTL, tmp | CP_RB2_CNTL__RB_RPTR_WR_ENA_MASK);
+ ring->wptr = 0;
+ WREG32(mmCP_RB2_WPTR, ring->wptr);
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32(mmCP_RB2_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32(mmCP_RB2_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+
+ mdelay(1);
+ WREG32(mmCP_RB2_CNTL, tmp);
+ WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
+
+
+ for (i = 0; i < 2; i++) {
+ r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v6_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v6_0_cp_gfx_enable(adev, enable);
+}
+
+static int gfx_v6_0_cp_load_microcode(struct amdgpu_device *adev)
+{
+ return gfx_v6_0_cp_gfx_load_microcode(adev);
+}
+
+static void gfx_v6_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
+ u32 mask;
+ int i;
+
+ if (enable)
+ tmp |= (CP_INT_CNTL__CNTX_BUSY_INT_ENABLE_MASK |
+ CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE_MASK);
+ else
+ tmp &= ~(CP_INT_CNTL__CNTX_BUSY_INT_ENABLE_MASK |
+ CP_INT_CNTL__CNTX_EMPTY_INT_ENABLE_MASK);
+ WREG32(mmCP_INT_CNTL_RING0, tmp);
+
+ if (!enable) {
+ /* read a gfx register */
+ tmp = RREG32(mmDB_DEPTH_INFO);
+
+ mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if ((RREG32(mmRLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
+ break;
+ udelay(1);
+ }
+ }
+}
+
+static int gfx_v6_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ gfx_v6_0_enable_gui_idle_interrupt(adev, false);
+
+ r = gfx_v6_0_cp_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v6_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ r = gfx_v6_0_cp_compute_resume(adev);
+ if (r)
+ return r;
+
+ gfx_v6_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static void gfx_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
+
+ if (usepfp) {
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for the invalidate to complete */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */
+ WAIT_REG_MEM_ENGINE(0))); /* me */
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* ref */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+
+ if (usepfp) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
+{
+ const u32 *src_ptr;
+ volatile u32 *dst_ptr;
+ u32 dws;
+ u64 reg_list_mc_addr;
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.reg_list = verde_rlc_save_restore_register_list;
+ adev->gfx.rlc.reg_list_size =
+ (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
+
+ adev->gfx.rlc.cs_data = si_cs_data;
+ src_ptr = adev->gfx.rlc.reg_list;
+ dws = adev->gfx.rlc.reg_list_size;
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (src_ptr) {
+ /* init save restore block */
+ r = amdgpu_gfx_rlc_init_sr(adev, dws);
+ if (r)
+ return r;
+ }
+
+ if (cs_data) {
+ /* clear state block */
+ adev->gfx.rlc.clear_state_size = gfx_v6_0_get_csb_size(adev);
+ dws = adev->gfx.rlc.clear_state_size + (256 / 4);
+
+ r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
+ amdgpu_gfx_rlc_fini(adev);
+ return r;
+ }
+
+ /* set up the cs buffer */
+ dst_ptr = adev->gfx.rlc.cs_ptr;
+ reg_list_mc_addr = adev->gfx.rlc.clear_state_gpu_addr + 256;
+ dst_ptr[0] = cpu_to_le32(upper_32_bits(reg_list_mc_addr));
+ dst_ptr[1] = cpu_to_le32(lower_32_bits(reg_list_mc_addr));
+ dst_ptr[2] = cpu_to_le32(adev->gfx.rlc.clear_state_size);
+ gfx_v6_0_get_csb_buffer(adev, &dst_ptr[(256/4)]);
+ amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+ }
+
+ return 0;
+}
+
+static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
+{
+ WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
+
+ if (!enable) {
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmSPI_LB_CU_MASK, 0x00ff);
+ }
+}
+
+static void gfx_v6_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmRLC_SERDES_MASTER_BUSY_0) == 0)
+ break;
+ udelay(1);
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmRLC_SERDES_MASTER_BUSY_1) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v6_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmRLC_CNTL);
+ if (tmp != rlc)
+ WREG32(mmRLC_CNTL, rlc);
+}
+
+static u32 gfx_v6_0_halt_rlc(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_CNTL);
+
+ if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
+ data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
+ WREG32(mmRLC_CNTL, data);
+
+ gfx_v6_0_wait_for_rlc_serdes(adev);
+ }
+
+ return orig;
+}
+
+static void gfx_v6_0_rlc_stop(struct amdgpu_device *adev)
+{
+ WREG32(mmRLC_CNTL, 0);
+
+ gfx_v6_0_enable_gui_idle_interrupt(adev, false);
+ gfx_v6_0_wait_for_rlc_serdes(adev);
+}
+
+static void gfx_v6_0_rlc_start(struct amdgpu_device *adev)
+{
+ WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+
+ gfx_v6_0_enable_gui_idle_interrupt(adev, true);
+
+ udelay(50);
+}
+
+static void gfx_v6_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static bool gfx_v6_0_lbpw_supported(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* Enable LBPW only for DDR3 */
+ tmp = RREG32(mmMC_SEQ_MISC0);
+ if ((tmp & 0xF0000000) == 0xB0000000)
+ return true;
+ return false;
+}
+
+static void gfx_v6_0_init_cg(struct amdgpu_device *adev)
+{
+}
+
+static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
+{
+ u32 i;
+ const struct rlc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ adev->gfx.rlc.funcs->stop(adev);
+ adev->gfx.rlc.funcs->reset(adev);
+ gfx_v6_0_init_pg(adev);
+ gfx_v6_0_init_cg(adev);
+
+ WREG32(mmRLC_RL_BASE, 0);
+ WREG32(mmRLC_RL_SIZE, 0);
+ WREG32(mmRLC_LB_CNTL, 0);
+ WREG32(mmRLC_LB_CNTR_MAX, 0xffffffff);
+ WREG32(mmRLC_LB_CNTR_INIT, 0);
+ WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+
+ WREG32(mmRLC_MC_CNTL, 0);
+ WREG32(mmRLC_UCODE_CNTL, 0);
+
+ hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ fw_data = (const __le32 *)
+ (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ for (i = 0; i < fw_size; i++) {
+ WREG32(mmRLC_UCODE_ADDR, i);
+ WREG32(mmRLC_UCODE_DATA, le32_to_cpup(fw_data++));
+ }
+ WREG32(mmRLC_UCODE_ADDR, 0);
+
+ gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
+ adev->gfx.rlc.funcs->start(adev);
+
+ return 0;
+}
+
+static void gfx_v6_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig, tmp;
+
+ orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ gfx_v6_0_enable_gui_idle_interrupt(adev, true);
+
+ WREG32(mmRLC_GCPM_GENERAL_3, 0x00000080);
+
+ tmp = gfx_v6_0_halt_rlc(adev);
+
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_CTRL, 0x00b000ff);
+
+ gfx_v6_0_wait_for_rlc_serdes(adev);
+ gfx_v6_0_update_rlc(adev, tmp);
+
+ WREG32(mmRLC_SERDES_WR_CTRL, 0x007000ff);
+
+ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ } else {
+ gfx_v6_0_enable_gui_idle_interrupt(adev, false);
+
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ }
+
+ if (orig != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+}
+
+static void gfx_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
+{
+
+ u32 data, orig, tmp = 0;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ orig = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data = 0x96940200;
+ if (orig != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ orig = data = RREG32(mmCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (orig != data)
+ WREG32(mmCP_MEM_SLP_CNTL, data);
+ }
+
+ orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data &= 0xffffffc0;
+ if (orig != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ tmp = gfx_v6_0_halt_rlc(adev);
+
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_CTRL, 0x00d000ff);
+
+ gfx_v6_0_update_rlc(adev, tmp);
+ } else {
+ orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000003;
+ if (orig != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ data = RREG32(mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32(mmCP_MEM_SLP_CNTL, data);
+ }
+ orig = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data |= CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK | CGTS_SM_CTRL_REG__OVERRIDE_MASK;
+ if (orig != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+
+ tmp = gfx_v6_0_halt_rlc(adev);
+
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_CTRL, 0x00e000ff);
+
+ gfx_v6_0_update_rlc(adev, tmp);
+ }
+}
+/*
+static void gfx_v6_0_update_cg(struct amdgpu_device *adev,
+ bool enable)
+{
+ gfx_v6_0_enable_gui_idle_interrupt(adev, false);
+ if (enable) {
+ gfx_v6_0_enable_mgcg(adev, true);
+ gfx_v6_0_enable_cgcg(adev, true);
+ } else {
+ gfx_v6_0_enable_cgcg(adev, false);
+ gfx_v6_0_enable_mgcg(adev, false);
+ }
+ gfx_v6_0_enable_gui_idle_interrupt(adev, true);
+}
+*/
+
+static void gfx_v6_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void gfx_v6_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void gfx_v6_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
+ data &= ~0x8000;
+ else
+ data |= 0x8000;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v6_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
+{
+}
+/*
+static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ volatile u32 *dst_ptr;
+ int me, i, max_me = 4;
+ u32 bo_offset = 0;
+ u32 table_offset, table_size;
+
+ if (adev->asic_type == CHIP_KAVERI)
+ max_me = 5;
+
+ if (adev->gfx.rlc.cp_table_ptr == NULL)
+ return;
+
+ dst_ptr = adev->gfx.rlc.cp_table_ptr;
+ for (me = 0; me < max_me; me++) {
+ if (me == 0) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 1) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 2) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 3) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec2_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ }
+
+ for (i = 0; i < table_size; i ++) {
+ dst_ptr[bo_offset + i] =
+ cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+ }
+
+ bo_offset += table_size;
+ }
+}
+*/
+static void gfx_v6_0_enable_gfx_cgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ WREG32(mmRLC_TTOP_D, RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10));
+ WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, 1);
+ WREG32_FIELD(RLC_AUTO_PG_CTRL, AUTO_PG_EN, 1);
+ } else {
+ WREG32_FIELD(RLC_AUTO_PG_CTRL, AUTO_PG_EN, 0);
+ (void)RREG32(mmDB_RENDER_CONTROL);
+ }
+}
+
+static void gfx_v6_0_init_ao_cu_mask(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+
+ tmp = RREG32(mmRLC_MAX_PG_CU);
+ tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
+ tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
+ WREG32(mmRLC_MAX_PG_CU, tmp);
+}
+
+static void gfx_v6_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
+ data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v6_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
+ data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v6_0_init_gfx_cgpg(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
+ WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_SRC, 1);
+ WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
+
+ tmp = RREG32(mmRLC_AUTO_PG_CTRL);
+ tmp &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
+ tmp |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
+ tmp &= ~RLC_AUTO_PG_CTRL__PG_AFTER_GRBM_REG_SAVE_THRESHOLD_MASK;
+ WREG32(mmRLC_AUTO_PG_CTRL, tmp);
+}
+
+static void gfx_v6_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v6_0_enable_gfx_cgpg(adev, enable);
+ gfx_v6_0_enable_gfx_static_mgpg(adev, enable);
+ gfx_v6_0_enable_gfx_dynamic_mgpg(adev, enable);
+}
+
+static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return 0;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+ /* pa_sc_raster_config */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v6_0_enable_sclk_slowdown_on_pu(adev, true);
+ gfx_v6_0_enable_sclk_slowdown_on_pd(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v6_0_init_gfx_cgpg(adev);
+ gfx_v6_0_enable_cp_pg(adev, true);
+ gfx_v6_0_enable_gds_pg(adev, true);
+ } else {
+ WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
+ WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
+
+ }
+ gfx_v6_0_init_ao_cu_mask(adev);
+ gfx_v6_0_update_gfx_pg(adev, true);
+ } else {
+
+ WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
+ WREG32(mmRLC_CLEAR_STATE_RESTORE_BASE, adev->gfx.rlc.clear_state_gpu_addr >> 8);
+ }
+}
+
+static void gfx_v6_0_fini_pg(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v6_0_update_gfx_pg(adev, false);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v6_0_enable_cp_pg(adev, false);
+ gfx_v6_0_enable_gds_pg(adev, false);
+ }
+ }
+}
+
+static uint64_t gfx_v6_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ return clock;
+}
+
+static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ if (flags & AMDGPU_HAVE_CTX_SWITCH)
+ gfx_v6_0_ring_emit_vgt_flush(ring);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0);
+}
+
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32(mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32(mmSQ_IND_DATA);
+}
+
+static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 0 wave data */
+ dst[(*no_fields)++] = 0;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ DRM_INFO("Not implemented\n");
+}
+
+static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v6_0_select_se_sh,
+ .read_wave_data = &gfx_v6_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v6_0_read_wave_sgprs,
+ .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
+};
+
+static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
+ .init = gfx_v6_0_rlc_init,
+ .resume = gfx_v6_0_rlc_resume,
+ .stop = gfx_v6_0_rlc_stop,
+ .reset = gfx_v6_0_rlc_reset,
+ .start = gfx_v6_0_rlc_start
+};
+
+static int gfx_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.xcc_mask = 1;
+ adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ GFX6_NUM_COMPUTE_RINGS);
+ adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
+ adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
+ gfx_v6_0_set_ring_funcs(adev);
+ gfx_v6_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int gfx_v6_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ r = gfx_v6_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load gfx firmware!\n");
+ return r;
+ }
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, "gfx");
+ r = amdgpu_ring_init(adev, ring, 2048,
+ &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ unsigned irq_type;
+
+ if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
+ DRM_ERROR("Too many (%d) compute rings!\n", i);
+ break;
+ }
+ ring = &adev->gfx.compute_ring[i];
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ ring->doorbell_index = 0;
+ ring->me = 1;
+ ring->pipe = i;
+ ring->queue = i;
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int gfx_v6_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_rlc_fini(adev);
+
+ return 0;
+}
+
+static int gfx_v6_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v6_0_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v6_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+ return r;
+}
+
+static int gfx_v6_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v6_0_cp_enable(adev, false);
+ adev->gfx.rlc.funcs->stop(adev);
+ gfx_v6_0_fini_pg(adev);
+
+ return 0;
+}
+
+static int gfx_v6_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v6_0_hw_fini(adev);
+}
+
+static int gfx_v6_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v6_0_hw_init(adev);
+}
+
+static bool gfx_v6_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v6_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v6_0_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v6_0_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v6_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int ring,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+ switch (state){
+ case AMDGPU_IRQ_STATE_DISABLE:
+ if (ring == 0) {
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING1);
+ cp_int_cntl &= ~CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING1, cp_int_cntl);
+ break;
+ } else {
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING2);
+ cp_int_cntl &= ~CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING2, cp_int_cntl);
+ break;
+
+ }
+ case AMDGPU_IRQ_STATE_ENABLE:
+ if (ring == 0) {
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING1);
+ cp_int_cntl |= CP_INT_CNTL_RING1__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING1, cp_int_cntl);
+ break;
+ } else {
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING2);
+ cp_int_cntl |= CP_INT_CNTL_RING2__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING2, cp_int_cntl);
+ break;
+
+ }
+
+ default:
+ BUG();
+ break;
+
+ }
+}
+
+static int gfx_v6_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v6_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v6_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v6_0_set_gfx_eop_interrupt_state(adev, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v6_0_set_compute_eop_interrupt_state(adev, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v6_0_set_compute_eop_interrupt_state(adev, 1, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ break;
+ case 1:
+ case 2:
+ amdgpu_fence_process(&adev->gfx.compute_ring[entry->ring_id - 1]);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v6_0_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_ring *ring;
+
+ switch (entry->ring_id) {
+ case 0:
+ ring = &adev->gfx.gfx_ring[0];
+ break;
+ case 1:
+ case 2:
+ ring = &adev->gfx.compute_ring[entry->ring_id - 1];
+ break;
+ default:
+ return;
+ }
+ drm_sched_fault(&ring->sched);
+}
+
+static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v6_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v6_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ gfx_v6_0_enable_gui_idle_interrupt(adev, false);
+ if (gate) {
+ gfx_v6_0_enable_mgcg(adev, true);
+ gfx_v6_0_enable_cgcg(adev, true);
+ } else {
+ gfx_v6_0_enable_cgcg(adev, false);
+ gfx_v6_0_enable_mgcg(adev, false);
+ }
+ gfx_v6_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static int gfx_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ gate = true;
+
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v6_0_update_gfx_pg(adev, gate);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v6_0_enable_cp_pg(adev, gate);
+ gfx_v6_0_enable_gds_pg(adev, gate);
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
+ .name = "gfx_v6_0",
+ .early_init = gfx_v6_0_early_init,
+ .late_init = NULL,
+ .sw_init = gfx_v6_0_sw_init,
+ .sw_fini = gfx_v6_0_sw_fini,
+ .hw_init = gfx_v6_0_hw_init,
+ .hw_fini = gfx_v6_0_hw_fini,
+ .suspend = gfx_v6_0_suspend,
+ .resume = gfx_v6_0_resume,
+ .is_idle = gfx_v6_0_is_idle,
+ .wait_for_idle = gfx_v6_0_wait_for_idle,
+ .soft_reset = gfx_v6_0_soft_reset,
+ .set_clockgating_state = gfx_v6_0_set_clockgating_state,
+ .set_powergating_state = gfx_v6_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = 0x80000000,
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v6_0_ring_get_rptr,
+ .get_wptr = gfx_v6_0_ring_get_wptr,
+ .set_wptr = gfx_v6_0_ring_set_wptr_gfx,
+ .emit_frame_size =
+ 5 + 5 + /* hdp flush / invalidate */
+ 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+ 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
+ 3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+ 5, /* SURFACE_SYNC */
+ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
+ .emit_ib = gfx_v6_0_ring_emit_ib,
+ .emit_fence = gfx_v6_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
+ .test_ring = gfx_v6_0_ring_test_ring,
+ .test_ib = gfx_v6_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
+ .emit_wreg = gfx_v6_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v6_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = 0x80000000,
+ .get_rptr = gfx_v6_0_ring_get_rptr,
+ .get_wptr = gfx_v6_0_ring_get_wptr,
+ .set_wptr = gfx_v6_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 5 + 5 + /* hdp flush / invalidate */
+ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
+ 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
+ 5, /* SURFACE_SYNC */
+ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
+ .emit_ib = gfx_v6_0_ring_emit_ib,
+ .emit_fence = gfx_v6_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
+ .test_ring = gfx_v6_0_ring_test_ring,
+ .test_ib = gfx_v6_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .emit_wreg = gfx_v6_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v6_0_emit_mem_sync,
+};
+
+static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v6_0_ring_funcs_gfx;
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v6_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v6_0_eop_irq_funcs = {
+ .set = gfx_v6_0_set_eop_interrupt_state,
+ .process = gfx_v6_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v6_0_priv_reg_irq_funcs = {
+ .set = gfx_v6_0_set_priv_reg_fault_state,
+ .process = gfx_v6_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v6_0_priv_inst_irq_funcs = {
+ .set = gfx_v6_0_set_priv_inst_fault_state,
+ .process = gfx_v6_0_priv_inst_irq,
+};
+
+static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v6_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v6_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v6_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ unsigned disable_masks[4 * 2];
+ u32 ao_cu_num;
+
+ if (adev->flags & AMD_IS_APU)
+ ao_cu_num = 2;
+ else
+ ao_cu_num = adev->gfx.config.max_cu_per_sh;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 4 && j < 2)
+ gfx_v6_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v6_0_get_cu_enabled(adev);
+ cu_info->bitmap[0][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < ao_cu_num)
+ ao_bitmap |= mask;
+ counter ++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+
+ gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+}
+
+const struct amdgpu_ip_block_version gfx_v6_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h
new file mode 100644
index 000000000..ced6fc42f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V6_0_H__
+#define __GFX_V6_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v6_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
new file mode 100644
index 000000000..c2faf6b4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -0,0 +1,5172 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_gfx.h"
+#include "cikd.h"
+#include "cik.h"
+#include "cik_structs.h"
+#include "atom.h"
+#include "amdgpu_ucode.h"
+#include "clearstate_ci.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gca/gfx_7_0_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+#include "gmc/gmc_7_0_d.h"
+#include "gmc/gmc_7_0_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */
+
+#define GFX7_NUM_GFX_RINGS 1
+#define GFX7_MEC_HPD_SIZE 2048
+
+static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
+
+MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kabini_me.bin");
+MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
+MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
+MODULE_FIRMWARE("amdgpu/mullins_me.bin");
+MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
+MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
+MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
+
+static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = {
+ {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
+ {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
+ {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
+ {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
+ {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
+ {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
+ {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
+ {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
+ {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
+ {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
+ {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
+ {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
+ {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
+ {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
+ {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
+ {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
+};
+
+static const u32 spectre_rlc_save_restore_register_list[] = {
+ (0x0e00 << 16) | (0xc12c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc140 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc150 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc15c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc168 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc170 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc178 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc204 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8228 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x829c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x869c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc260 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c000 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c00c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8900 >> 2),
+ 0x00000000,
+ 0x3,
+ (0x0e00 << 16) | (0xc130 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc134 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc208 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc264 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc268 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc26c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc270 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc274 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc278 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc27c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc280 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc284 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc288 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc28c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc290 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc294 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc298 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc29c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x301d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30238 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30250 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30254 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30258 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3025c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc99c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0001 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bf0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30a04 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a10 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a14 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a2c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc704 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc708 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc768 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc770 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc774 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc778 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc77c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc780 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc784 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc788 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc78c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc798 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc79c >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a0 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a4 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7a8 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7ac >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7b0 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc7b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c010 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92cc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x92d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c38 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c3c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9604 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac58 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac68 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac6c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac70 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac74 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac78 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac80 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac84 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac88 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac8c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x970c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9714 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9718 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x971c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x8e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x9e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0xae00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0xbe00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88bc >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8980 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30938 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3093c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30940 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30904 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c210 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c214 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c218 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8904 >> 2),
+ 0x00000000,
+ 0x5,
+ (0x0e00 << 16) | (0x8c28 >> 2),
+ (0x0e00 << 16) | (0x8c2c >> 2),
+ (0x0e00 << 16) | (0x8c30 >> 2),
+ (0x0e00 << 16) | (0x8c34 >> 2),
+ (0x0e00 << 16) | (0x9600 >> 2),
+};
+
+static const u32 kalindi_rlc_save_restore_register_list[] = {
+ (0x0e00 << 16) | (0xc12c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc140 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc150 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc15c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc168 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc170 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc204 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8228 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x829c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x869c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x98f4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x98f8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc260 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x90e8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c000 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c00c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c1c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xcd20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89bc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8900 >> 2),
+ 0x00000000,
+ 0x3,
+ (0x0e00 << 16) | (0xc130 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc134 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc208 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc264 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc268 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc26c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc270 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc274 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc28c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc290 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc294 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc298 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2a8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc2ac >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x301d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30238 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30250 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30254 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30258 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3025c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc900 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc904 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc908 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc90c >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0xc910 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc99c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9834 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f00 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f04 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f08 >> 2),
+ 0x00000000,
+ (0x0000 << 16) | (0x30f0c >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x9b7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bf0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8bcc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8b24 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30a04 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a10 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a14 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a18 >> 2),
+ 0x00000000,
+ (0x0600 << 16) | (0x30a2c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc700 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc704 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc708 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xc768 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc770 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc774 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc798 >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0xc79c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9100 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c010 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c04 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c20 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c38 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8c3c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xae00 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9604 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac08 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac0c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac58 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac68 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac6c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac70 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac74 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac78 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac7c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac80 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac84 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac88 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xac8c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x970c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9714 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x9718 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x971c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x4e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x5e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x6e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x7e00 << 16) | (0x31068 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd10 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0xcd14 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88b8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88bc >> 2),
+ 0x00000000,
+ (0x0400 << 16) | (0x89c0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88c8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x88d8 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8980 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30938 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3093c >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30940 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89a0 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30900 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x30904 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x89b4 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3e1fc >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c210 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c214 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x3c218 >> 2),
+ 0x00000000,
+ (0x0e00 << 16) | (0x8904 >> 2),
+ 0x00000000,
+ 0x5,
+ (0x0e00 << 16) | (0x8c28 >> 2),
+ (0x0e00 << 16) | (0x8c2c >> 2),
+ (0x0e00 << 16) | (0x8c30 >> 2),
+ (0x0e00 << 16) | (0x8c34 >> 2),
+ (0x0e00 << 16) | (0x9600 >> 2),
+};
+
+static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
+static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
+
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+}
+
+/*
+ * Core functions
+ */
+/**
+ * gfx_v7_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ chip_name = "bonaire";
+ break;
+ case CHIP_HAWAII:
+ chip_name = "hawaii";
+ break;
+ case CHIP_KAVERI:
+ chip_name = "kaveri";
+ break;
+ case CHIP_KABINI:
+ chip_name = "kabini";
+ break;
+ case CHIP_MULLINS:
+ chip_name = "mullins";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err)
+ goto out;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err)
+ goto out;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ if (err)
+ goto out;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+
+ if (adev->asic_type == CHIP_KAVERI) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ if (err)
+ goto out;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+out:
+ if (err) {
+ pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
+ gfx_v7_0_free_microcode(adev);
+ }
+ return err;
+}
+
+/**
+ * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Starting with SI, the tiling setup is done globally in a
+ * set of 32 tiling modes. Rather than selecting each set of
+ * parameters per surface as on older asics, we just select
+ * which index in the tiling table we want to use, and the
+ * surface uses those parameters (CIK).
+ */
+static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ const u32 num_tile_mode_states =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ const u32 num_secondary_tile_mode_states =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+ u32 reg_offset, split_equal_to_row_size;
+ uint32_t *tile, *macrotile;
+
+ tile = adev->gfx.config.tile_mode_array;
+ macrotile = adev->gfx.config.macrotile_mode_array;
+
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
+ break;
+ case 2:
+ default:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
+ break;
+ case 4:
+ split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
+ break;
+ }
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ tile[reg_offset] = 0;
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ macrotile[reg_offset] = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
+ break;
+ case CHIP_HAWAII:
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
+ break;
+ case CHIP_KABINI:
+ case CHIP_KAVERI:
+ case CHIP_MULLINS:
+ default:
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P2));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
+ break;
+ }
+}
+
+/**
+ * gfx_v7_0_select_se_sh - select which SE, SH to address
+ *
+ * @adev: amdgpu_device pointer
+ * @se_num: shader engine to address
+ * @sh_num: sh block to address
+ * @instance: Certain registers are instanced per SE or SH.
+ * 0xffffffff means broadcast to all SEs or SHs (CIK).
+ * @xcc_id: xcc accelerated compute core id
+ * Select which SE, SH combinations to address.
+ */
+static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
+
+ if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
+ data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
+ else if (se_num == 0xffffffff)
+ data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
+ (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
+ else if (sh_num == 0xffffffff)
+ data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+ (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
+ else
+ data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
+ (se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
+ WREG32(mmGRBM_GFX_INDEX, data);
+}
+
+/**
+ * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculates the bitmask of enabled RBs (CIK).
+ * Returns the enabled RB bitmask.
+ */
+static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_RB_BACKEND_DISABLE);
+ data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void
+gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_HAWAII:
+ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
+ RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
+ PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
+ SE_YSEL(3);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_KAVERI:
+ *rconf |= RB_MAP_PKR0(2);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ *rconf |= 0x0;
+ *rconf1 |= 0x0;
+ break;
+ default:
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ break;
+ }
+}
+
+static void
+gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ u32 raster_config, u32 raster_config_1,
+ unsigned rb_mask, unsigned num_rb)
+{
+ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= ~SE_PAIR_MAP_MASK;
+
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
+ } else {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= ~SE_MAP_MASK;
+
+ if (!se_mask[idx]) {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= ~PKR_MAP_MASK;
+
+ if (!pkr0_mask) {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR0_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR1_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on CI+ */
+ gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on CI+ */
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+}
+
+/**
+ * gfx_v7_0_setup_rb - setup the RBs on the asic
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Configures per-SE/SH RB registers (CIK).
+ */
+static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 raster_config = 0, raster_config_1 = 0;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ unsigned num_rb_pipes;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = gfx_v7_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+
+ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines, 16);
+
+ gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
+
+ if (!adev->gfx.config.backend_enable_mask ||
+ adev->gfx.config.num_rbs >= num_rb_pipes) {
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ } else {
+ gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
+ adev->gfx.config.backend_enable_mask,
+ num_rb_pipes);
+ }
+
+ /* cache the values for userspace */
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ adev->gfx.config.rb_config[i][j].rb_backend_disable =
+ RREG32(mmCC_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].raster_config =
+ RREG32(mmPA_SC_RASTER_CONFIG);
+ adev->gfx.config.rb_config[i][j].raster_config_1 =
+ RREG32(mmPA_SC_RASTER_CONFIG_1);
+ }
+ }
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+/**
+ * gfx_v7_0_init_compute_vmid - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+ sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+ sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ cik_srbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, 1);
+ WREG32(mmSH_MEM_APE1_LIMIT, 0);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+ }
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ access. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
+}
+
+static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
+}
+
+static void gfx_v7_0_config_init(struct amdgpu_device *adev)
+{
+ adev->gfx.config.double_offchip_lds_buf = 1;
+}
+
+/**
+ * gfx_v7_0_constants_init - setup the 3D engine
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * init the gfx constants such as the 3D engine, tiling configuration
+ * registers, maximum number of quad pipes, render backends...
+ */
+static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
+ u32 tmp;
+ int i;
+
+ WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
+
+ WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
+
+ gfx_v7_0_tiling_mode_table_init(adev);
+
+ gfx_v7_0_setup_rb(adev);
+ gfx_v7_0_get_cu_info(adev);
+ gfx_v7_0_config_init(adev);
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /*
+ * making sure that the following register writes will be broadcasted
+ * to all the shaders
+ */
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
+ MTYPE_NC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
+ MTYPE_UC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
+
+ sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
+ SWIZZLE_ENABLE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ ELEMENT_SIZE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ INDEX_STRIDE, 3);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
+ if (i == 0)
+ sh_mem_base = 0;
+ else
+ sh_mem_base = adev->gmc.shared_aperture_start >> 48;
+ cik_srbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
+ WREG32(mmSH_MEM_APE1_BASE, 1);
+ WREG32(mmSH_MEM_APE1_LIMIT, 0);
+ WREG32(mmSH_MEM_BASES, sh_mem_base);
+ }
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v7_0_init_compute_vmid(adev);
+ gfx_v7_0_init_gds_vmid(adev);
+
+ WREG32(mmSX_DEBUG_1, 0x20);
+
+ WREG32(mmTA_CNTL_AUX, 0x00010000);
+
+ tmp = RREG32(mmSPI_CONFIG_CNTL);
+ tmp |= 0x03000000;
+ WREG32(mmSPI_CONFIG_CNTL, tmp);
+
+ WREG32(mmSQ_CONFIG, 1);
+
+ WREG32(mmDB_DEBUG, 0);
+
+ tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
+ tmp |= 0x00000400;
+ WREG32(mmDB_DEBUG2, tmp);
+
+ tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
+ tmp |= 0x00020200;
+ WREG32(mmDB_DEBUG3, tmp);
+
+ tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
+ tmp |= 0x00018208;
+ WREG32(mmCB_HW_CONTROL, tmp);
+
+ WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
+
+ WREG32(mmPA_SC_FIFO_SIZE,
+ ((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
+
+ WREG32(mmVGT_NUM_INSTANCES, 1);
+
+ WREG32(mmCP_PERFMON_CNTL, 0);
+
+ WREG32(mmSQ_CONFIG, 0);
+
+ WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
+ ((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
+ (255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
+
+ WREG32(mmVGT_CACHE_INVALIDATION,
+ (VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
+ (ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
+
+ WREG32(mmVGT_GS_VERTEX_REUSE, 16);
+ WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
+
+ WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
+ (3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
+ WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
+
+ tmp = RREG32(mmSPI_ARB_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
+ WREG32(mmSPI_ARB_PRIORITY, tmp);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ udelay(50);
+}
+
+/**
+ * gfx_v7_0_ring_test_ring - basic gfx ring test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Allocate a scratch register and write to it using the gfx ring (CIK).
+ * Provides a basic gfx ring test to verify that the ring is working.
+ * Used by gfx_v7_0_cp_gfx_resume();
+ * Returns 0 on success, error on failure.
+ */
+static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSCRATCH_REG0);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+/**
+ * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Emits an hdp flush on the cp.
+ */
+static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 ref_and_mask;
+ int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ } else {
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
+ WAIT_REG_MEM_FUNCTION(3) | /* == */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
+ amdgpu_ring_write(ring, ref_and_mask);
+ amdgpu_ring_write(ring, ref_and_mask);
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+}
+
+static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
+ EVENT_INDEX(4));
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+ EVENT_INDEX(0));
+}
+
+/**
+ * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Emits a fence sequence number on the gfx ring and flushes
+ * GPU caches.
+ */
+static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ /* Workaround for cache flush problems. First send a dummy EOP
+ * event down the pipe with seq one below.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(1) | INT_SEL(0));
+ amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+ amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+ /* Then send the real EOP event down the pipe. */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+}
+
+/**
+ * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Emits a fence sequence number on the compute ring and flushes
+ * GPU caches.
+ */
+static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
+ u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+}
+
+/*
+ * IB stuff
+ */
+/**
+ * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @job: job to retrieve vmid from
+ * @ib: amdgpu indirect buffer object
+ * @flags: options (AMDGPU_HAVE_CTX_SWITCH)
+ *
+ * Emits an DE (drawing engine) or CE (constant engine) IB
+ * on the gfx ring. IBs are usually generated by userspace
+ * acceleration drivers and submitted to the kernel for
+ * scheduling on the ring. This function schedules the IB
+ * on the gfx ring for execution by the GPU.
+ */
+static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ (ib->gpu_addr & 0xFFFFFFFC));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ (ib->gpu_addr & 0xFFFFFFFC));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ gfx_v7_0_ring_emit_vgt_flush(ring);
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * gfx_v7_0_ring_test_ib - basic ring IB test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Allocate an IB and execute it on the gfx ring (CIK).
+ * Provides a basic gfx ring test to verify that IBs are working.
+ * Returns 0 on success, error on failure.
+ */
+static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ uint32_t tmp = 0;
+ long r;
+
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ return r;
+
+ ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
+ ib.ptr[2] = 0xDEADBEEF;
+ ib.length_dw = 3;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ goto error;
+ }
+ tmp = RREG32(mmSCRATCH_REG0);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+error:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+ return r;
+}
+
+/*
+ * CP.
+ * On CIK, gfx and compute now have independent command processors.
+ *
+ * GFX
+ * Gfx consists of a single ring and can process both gfx jobs and
+ * compute jobs. The gfx CP consists of three microengines (ME):
+ * PFP - Pre-Fetch Parser
+ * ME - Micro Engine
+ * CE - Constant Engine
+ * The PFP and ME make up what is considered the Drawing Engine (DE).
+ * The CE is an asynchronous engine used for updating buffer desciptors
+ * used by the DE so that they can be loaded into cache in parallel
+ * while the DE is processing state update packets.
+ *
+ * Compute
+ * The compute CP consists of two microengines (ME):
+ * MEC1 - Compute MicroEngine 1
+ * MEC2 - Compute MicroEngine 2
+ * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
+ * The queues are exposed to userspace and are programmed directly
+ * by the compute runtime.
+ */
+/**
+ * gfx_v7_0_cp_gfx_enable - enable/disable the gfx CP MEs
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable the MEs
+ *
+ * Halts or unhalts the gfx MEs.
+ */
+static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32(mmCP_ME_CNTL, 0);
+ else
+ WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
+ CP_ME_CNTL__PFP_HALT_MASK |
+ CP_ME_CNTL__CE_HALT_MASK));
+ udelay(50);
+}
+
+/**
+ * gfx_v7_0_cp_gfx_load_microcode - load the gfx CP ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the gfx PFP, ME, and CE ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+ adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
+ adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
+ adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
+ adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
+ adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
+ adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
+
+ gfx_v7_0_cp_gfx_enable(adev, false);
+
+ /* PFP */
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_PFP_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ /* CE */
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_CE_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
+
+ /* ME */
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_ME_RAM_WADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+/**
+ * gfx_v7_0_cp_gfx_start - start the gfx ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enables the ring and loads the clear state context and other
+ * packets required to init the ring.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+
+ /* init the CP */
+ WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
+ WREG32(mmCP_ENDIAN_SWAP, 0);
+ WREG32(mmCP_DEVICE_ID, 1);
+
+ gfx_v7_0_cp_gfx_enable(adev, true);
+
+ r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ /* init the CE partitions. CE only used for gfx on CIK */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ /* clear state buffer */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
+ amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
+ amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ amdgpu_ring_write(ring, 0x00000316);
+ amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+ amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+/**
+ * gfx_v7_0_cp_gfx_resume - setup the gfx ring buffer registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the location and size of the gfx ring buffer
+ * and test it to make sure it's working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr;
+ int r;
+
+ WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
+ if (adev->asic_type != CHIP_HAWAII)
+ WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
+
+ /* Set the write pointer delay */
+ WREG32(mmCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32(mmCP_RB_VMID, 0);
+
+ WREG32(mmSCRATCH_ADDR, 0);
+
+ /* ring 0 - compute and gfx */
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+ tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
+#endif
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
+ ring->wptr = 0;
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+
+ /* scratch register shadowing is no longer supported */
+ WREG32(mmSCRATCH_UMSK, 0);
+
+ mdelay(1);
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32(mmCP_RB0_BASE, rb_addr);
+ WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ /* start the ring */
+ gfx_v7_0_cp_gfx_start(adev);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmCP_RB0_WPTR);
+}
+
+static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ (void)RREG32(mmCP_RB0_WPTR);
+}
+
+static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ /* XXX check if swapping is necessary on BE */
+ return *ring->wptr_cpu_addr;
+}
+
+static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+}
+
+/**
+ * gfx_v7_0_cp_compute_enable - enable/disable the compute CP MEs
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable the MEs
+ *
+ * Halts or unhalts the compute MEs.
+ */
+static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32(mmCP_MEC_CNTL, 0);
+ else
+ WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ udelay(50);
+}
+
+/**
+ * gfx_v7_0_cp_compute_load_microcode - load the compute CP ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the compute MEC1&2 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+ adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version = le32_to_cpu(
+ mec_hdr->ucode_feature_version);
+
+ gfx_v7_0_cp_compute_enable(adev, false);
+
+ /* MEC1 */
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
+
+ if (adev->asic_type == CHIP_KAVERI) {
+ const struct gfx_firmware_header_v1_0 *mec2_hdr;
+
+ if (!adev->gfx.mec2_fw)
+ return -EINVAL;
+
+ mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
+ adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version = le32_to_cpu(
+ mec2_hdr->ucode_feature_version);
+
+ /* MEC2 */
+ fw_data = (const __le32 *)
+ (adev->gfx.mec2_fw->data +
+ le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * gfx_v7_0_cp_compute_fini - stop the compute queues
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute queues and tear down the driver queue
+ * info.
+ */
+static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
+ }
+}
+
+static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+}
+
+static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+
+ /* allocate space for ALL pipes (even the ones we don't own) */
+ mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+ * GFX7_MEC_HPD_SIZE * 2;
+
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
+ gfx_v7_0_mec_fini(adev);
+ return r;
+ }
+
+ /* clear memory. Not sure if this is required or not */
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+
+ return 0;
+}
+
+struct hqd_registers {
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+};
+
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
+ int mec, int pipe)
+{
+ u64 eop_gpu_addr;
+ u32 tmp;
+ size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
+ * GFX7_MEC_HPD_SIZE * 2;
+
+ mutex_lock(&adev->srbm_mutex);
+ eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
+
+ cik_srbm_select(adev, mec + 1, pipe, 0, 0);
+
+ /* write the EOP addr */
+ WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+ WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+
+ /* set the VMID assigned */
+ WREG32(mmCP_HPD_EOP_VMID, 0);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+ tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+ tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+ WREG32(mmCP_HPD_EOP_CONTROL, tmp);
+
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+ int i;
+
+ /* disable the queue if it's active */
+ if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout)
+ return -ETIMEDOUT;
+
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+ WREG32(mmCP_HQD_PQ_RPTR, 0);
+ WREG32(mmCP_HQD_PQ_WPTR, 0);
+ }
+
+ return 0;
+}
+
+static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
+ struct cik_mqd *mqd,
+ uint64_t mqd_gpu_addr,
+ struct amdgpu_ring *ring)
+{
+ u64 hqd_gpu_addr;
+ u64 wb_gpu_addr;
+
+ /* init the mqd struct */
+ memset(mqd, 0, sizeof(struct cik_mqd));
+
+ mqd->header = 0xC0310800;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+
+ /* enable doorbell? */
+ mqd->cp_hqd_pq_doorbell_control =
+ RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+ if (ring->use_doorbell)
+ mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+ else
+ mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+ mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+ mqd->cp_hqd_pq_control &=
+ ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
+ CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
+
+ mqd->cp_hqd_pq_control |=
+ order_base_2(ring->ring_size / 8);
+ mqd->cp_hqd_pq_control |=
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
+#ifdef __BIG_ENDIAN
+ mqd->cp_hqd_pq_control |=
+ 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
+#endif
+ mqd->cp_hqd_pq_control &=
+ ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
+ CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
+ CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
+ mqd->cp_hqd_pq_control |=
+ CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
+ CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set the wb address wether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ mqd->cp_hqd_pq_doorbell_control =
+ RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+ mqd->cp_hqd_pq_doorbell_control &=
+ ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
+ mqd->cp_hqd_pq_doorbell_control |=
+ (ring->doorbell_index <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
+ mqd->cp_hqd_pq_doorbell_control |=
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+ mqd->cp_hqd_pq_doorbell_control &=
+ ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+
+ } else {
+ mqd->cp_hqd_pq_doorbell_control = 0;
+ }
+
+ /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ /* defaults */
+ mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+ mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
+ mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
+ mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
+ mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
+ mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
+ mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
+ mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
+ mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
+ mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
+ mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+ mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+ mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+ mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+}
+
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+{
+ uint32_t tmp;
+ uint32_t mqd_reg;
+ uint32_t *mqd_data;
+
+ /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+ mqd_data = &mqd->cp_mqd_base_addr_lo;
+
+ /* disable wptr polling */
+ tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+
+ /* program all HQD registers */
+ for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ /* activate the HQD */
+ for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ return 0;
+}
+
+static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
+{
+ int r;
+ u64 mqd_gpu_addr;
+ struct cik_mqd *mqd;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+ r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &mqd_gpu_addr, (void **)&mqd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
+ gfx_v7_0_mqd_deactivate(adev);
+ gfx_v7_0_mqd_commit(adev, mqd);
+
+ cik_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+/**
+ * gfx_v7_0_cp_compute_resume - setup the compute queue registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+ int r, i, j;
+ u32 tmp;
+ struct amdgpu_ring *ring;
+
+ /* fix up chicken bits */
+ tmp = RREG32(mmCP_CPF_DEBUG);
+ tmp |= (1 << 23);
+ WREG32(mmCP_CPF_DEBUG, tmp);
+
+ /* init all pipes (even the ones we don't own) */
+ for (i = 0; i < adev->gfx.mec.num_mec; i++)
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
+ gfx_v7_0_compute_pipe_init(adev, i, j);
+
+ /* init the queues */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v7_0_compute_queue_init(adev, i);
+ if (r) {
+ gfx_v7_0_cp_compute_fini(adev);
+ return r;
+ }
+ }
+
+ gfx_v7_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ amdgpu_ring_test_helper(ring);
+ }
+
+ return 0;
+}
+
+static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v7_0_cp_gfx_enable(adev, enable);
+ gfx_v7_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = gfx_v7_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+ r = gfx_v7_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
+
+ if (enable)
+ tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
+ else
+ tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
+ WREG32(mmCP_INT_CNTL_RING0, tmp);
+}
+
+static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, false);
+
+ r = gfx_v7_0_cp_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v7_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ r = gfx_v7_0_cp_compute_resume(adev);
+ if (r)
+ return r;
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+/**
+ * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP
+ *
+ * @ring: the ring to emit the commands to
+ *
+ * Sync the command pipeline with the PFP. E.g. wait for everything
+ * to be completed.
+ */
+static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
+
+ if (usepfp) {
+ /* sync CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+/*
+ * vm
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+/**
+ * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using the CP (CIK).
+ */
+static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for the invalidate to complete */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+ WAIT_REG_MEM_FUNCTION(0) | /* always */
+ WAIT_REG_MEM_ENGINE(0))); /* me */
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* ref */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+
+ /* compute doesn't have PFP */
+ if (usepfp) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+/*
+ * RLC
+ * The RLC is a multi-purpose microengine that handles a
+ * variety of functions.
+ */
+static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
+{
+ const u32 *src_ptr;
+ u32 dws;
+ const struct cs_section_def *cs_data;
+ int r;
+
+ /* allocate rlc buffers */
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->asic_type == CHIP_KAVERI) {
+ adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
+ adev->gfx.rlc.reg_list_size =
+ (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
+ } else {
+ adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
+ adev->gfx.rlc.reg_list_size =
+ (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
+ }
+ }
+ adev->gfx.rlc.cs_data = ci_cs_data;
+ adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+ adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
+
+ src_ptr = adev->gfx.rlc.reg_list;
+ dws = adev->gfx.rlc.reg_list_size;
+ dws += (5 * 16) + 48 + 48 + 64;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (src_ptr) {
+ /* init save restore block */
+ r = amdgpu_gfx_rlc_init_sr(adev, dws);
+ if (r)
+ return r;
+ }
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.rlc.cp_table_size) {
+ r = amdgpu_gfx_rlc_init_cpt(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
+ return 0;
+}
+
+static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmRLC_LB_CNTL);
+ if (enable)
+ tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
+ else
+ tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
+ WREG32(mmRLC_LB_CNTL, tmp);
+}
+
+static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ }
+ }
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmRLC_CNTL);
+ if (tmp != rlc)
+ WREG32(mmRLC_CNTL, rlc);
+}
+
+static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_CNTL);
+
+ if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
+ u32 i;
+
+ data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
+ WREG32(mmRLC_CNTL, data);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
+ break;
+ udelay(1);
+ }
+
+ gfx_v7_0_wait_for_rlc_serdes(adev);
+ }
+
+ return orig;
+}
+
+static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ return true;
+}
+
+static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 tmp, i, mask;
+
+ tmp = 0x1 | (1 << 1);
+ WREG32(mmRLC_GPR_REG2, tmp);
+
+ mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
+ RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
+ break;
+ udelay(1);
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 tmp;
+
+ tmp = 0x1 | (0 << 1);
+ WREG32(mmRLC_GPR_REG2, tmp);
+}
+
+/**
+ * gfx_v7_0_rlc_stop - stop the RLC ME
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Halt the RLC ME (MicroEngine) (CIK).
+ */
+static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
+{
+ WREG32(mmRLC_CNTL, 0);
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, false);
+
+ gfx_v7_0_wait_for_rlc_serdes(adev);
+}
+
+/**
+ * gfx_v7_0_rlc_start - start the RLC ME
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Unhalt the RLC ME (MicroEngine) (CIK).
+ */
+static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
+{
+ WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+
+ udelay(50);
+}
+
+static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32(mmGRBM_SOFT_RESET);
+
+ tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ udelay(50);
+ tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ udelay(50);
+}
+
+/**
+ * gfx_v7_0_rlc_resume - setup the RLC hw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the RLC registers, load the ucode,
+ * and start the RLC (CIK).
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+ adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(
+ hdr->ucode_feature_version);
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
+ WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
+
+ adev->gfx.rlc.funcs->reset(adev);
+
+ gfx_v7_0_init_pg(adev);
+
+ WREG32(mmRLC_LB_CNTR_INIT, 0);
+ WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+ WREG32(mmRLC_LB_PARAMS, 0x00600408);
+ WREG32(mmRLC_LB_CNTL, 0x80000004);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32(mmRLC_MC_CNTL, 0);
+ WREG32(mmRLC_UCODE_CNTL, 0);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ WREG32(mmRLC_GPM_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ /* XXX - find out what chips support lbpw */
+ gfx_v7_0_enable_lbpw(adev, false);
+
+ if (adev->asic_type == CHIP_BONAIRE)
+ WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
+
+ adev->gfx.rlc.funcs->start(adev);
+
+ return 0;
+}
+
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ data = RREG32(mmRLC_SPM_VMID);
+
+ data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
+
+ WREG32(mmRLC_SPM_VMID, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig, tmp, tmp2;
+
+ orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+
+ tmp = gfx_v7_0_halt_rlc(adev);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
+ RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
+ RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
+ WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v7_0_update_rlc(adev, tmp);
+
+ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (orig != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+ } else {
+ gfx_v7_0_enable_gui_idle_interrupt(adev, false);
+
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ if (orig != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+ }
+}
+
+static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig, tmp = 0;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ orig = data = RREG32(mmCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (orig != data)
+ WREG32(mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+ orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000001;
+ data &= 0xfffffffd;
+ if (orig != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ tmp = gfx_v7_0_halt_rlc(adev);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
+ RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
+ WREG32(mmRLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v7_0_update_rlc(adev, tmp);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
+ orig = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
+ data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
+ data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
+ data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
+ if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
+ data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
+ data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
+ data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
+ data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
+ if (orig != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+ }
+ } else {
+ orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= 0x00000003;
+ if (orig != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ data = RREG32(mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32(mmRLC_MEM_SLP_CNTL, data);
+ }
+
+ data = RREG32(mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32(mmCP_MEM_SLP_CNTL, data);
+ }
+
+ orig = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
+ if (orig != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+
+ tmp = gfx_v7_0_halt_rlc(adev);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+ data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
+ WREG32(mmRLC_SERDES_WR_CTRL, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v7_0_update_rlc(adev, tmp);
+ }
+}
+
+static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
+ bool enable)
+{
+ gfx_v7_0_enable_gui_idle_interrupt(adev, false);
+ /* order matters! */
+ if (enable) {
+ gfx_v7_0_enable_mgcg(adev, true);
+ gfx_v7_0_enable_cgcg(adev, true);
+ } else {
+ gfx_v7_0_enable_cgcg(adev, false);
+ gfx_v7_0_enable_mgcg(adev, false);
+ }
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+}
+
+static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
+ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
+ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
+ data &= ~0x8000;
+ else
+ data |= 0x8000;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
+ data &= ~0x2000;
+ else
+ data |= 0x2000;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_KAVERI)
+ return 5;
+ else
+ return 4;
+}
+
+static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+
+ orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
+ data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
+ if (orig != data)
+ WREG32(mmRLC_AUTO_PG_CTRL, data);
+ } else {
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+
+ orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
+ data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
+ if (orig != data)
+ WREG32(mmRLC_AUTO_PG_CTRL, data);
+
+ data = RREG32(mmDB_RENDER_CONTROL);
+ }
+}
+
+static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+
+ tmp = RREG32(mmRLC_MAX_PG_CU);
+ tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
+ tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
+ WREG32(mmRLC_MAX_PG_CU, tmp);
+}
+
+static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
+ data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
+ data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
+
+static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+ u32 i;
+
+ if (adev->gfx.rlc.cs_data) {
+ WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+ WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
+ WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
+ WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
+ } else {
+ WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+ for (i = 0; i < 3; i++)
+ WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
+ }
+ if (adev->gfx.rlc.reg_list) {
+ WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
+ for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
+ WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
+ }
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+
+ WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
+ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+
+ data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
+ data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
+ data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
+
+ data = 0x10101010;
+ WREG32(mmRLC_PG_DELAY, data);
+
+ data = RREG32(mmRLC_PG_DELAY_2);
+ data &= ~0xff;
+ data |= 0x3;
+ WREG32(mmRLC_PG_DELAY_2, data);
+
+ data = RREG32(mmRLC_AUTO_PG_CTRL);
+ data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
+ WREG32(mmRLC_AUTO_PG_CTRL, data);
+
+}
+
+static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v7_0_enable_gfx_cgpg(adev, enable);
+ gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
+ gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
+}
+
+static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return 0;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+ /* pa_sc_raster_config/pa_sc_raster_config1 */
+ count += 4;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ buffer[count++] = cpu_to_le32(0x16000012);
+ buffer[count++] = cpu_to_le32(0x00000000);
+ break;
+ case CHIP_KAVERI:
+ buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
+ buffer[count++] = cpu_to_le32(0x00000000);
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
+ buffer[count++] = cpu_to_le32(0x00000000);
+ break;
+ case CHIP_HAWAII:
+ buffer[count++] = cpu_to_le32(0x3a00161a);
+ buffer[count++] = cpu_to_le32(0x0000002e);
+ break;
+ default:
+ buffer[count++] = cpu_to_le32(0x00000000);
+ buffer[count++] = cpu_to_le32(0x00000000);
+ break;
+ }
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
+ gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v7_0_init_gfx_cgpg(adev);
+ gfx_v7_0_enable_cp_pg(adev, true);
+ gfx_v7_0_enable_gds_pg(adev, true);
+ }
+ gfx_v7_0_init_ao_cu_mask(adev);
+ gfx_v7_0_update_gfx_pg(adev, true);
+ }
+}
+
+static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v7_0_update_gfx_pg(adev, false);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v7_0_enable_cp_pg(adev, false);
+ gfx_v7_0_enable_gds_pg(adev, false);
+ }
+ }
+}
+
+/**
+ * gfx_v7_0_get_gpu_clock_counter - return GPU clock counter snapshot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetches a GPU clock counter snapshot (SI).
+ * Returns the 64 bit clock counter snapshot.
+ */
+static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ return clock;
+}
+
+static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ /* GDS Base */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gds_base);
+
+ /* GDS Size */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gds_size);
+
+ /* GWS */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32(mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32(mmSQ_IND_DATA);
+}
+
+static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 0 wave data */
+ dst[(*no_fields)++] = 0;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ cik_srbm_select(adev, me, pipe, q, vm);
+}
+
+static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v7_0_select_se_sh,
+ .read_wave_data = &gfx_v7_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
+ .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
+};
+
+static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v7_0_set_safe_mode,
+ .unset_safe_mode = gfx_v7_0_unset_safe_mode,
+ .init = gfx_v7_0_rlc_init,
+ .get_csb_size = gfx_v7_0_get_csb_size,
+ .get_csb_buffer = gfx_v7_0_get_csb_buffer,
+ .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
+ .resume = gfx_v7_0_rlc_resume,
+ .stop = gfx_v7_0_rlc_stop,
+ .reset = gfx_v7_0_rlc_reset,
+ .start = gfx_v7_0_rlc_start,
+ .update_spm_vmid = gfx_v7_0_update_spm_vmid
+};
+
+static int gfx_v7_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.xcc_mask = 1;
+ adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
+ adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
+ gfx_v7_0_set_ring_funcs(adev);
+ gfx_v7_0_set_irq_funcs(adev);
+ gfx_v7_0_set_gds_init(adev);
+
+ return 0;
+}
+
+static int gfx_v7_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+ u32 mc_arb_ramcfg;
+ u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+ u32 tmp;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ adev->gfx.config.max_shader_engines = 2;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 7;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_HAWAII:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 16;
+ adev->gfx.config.max_cu_per_sh = 11;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 16;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_KAVERI:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ default:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 2;
+ adev->gfx.config.max_cu_per_sh = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 1;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ }
+
+ adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+ mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+ adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
+ adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
+ adev->gfx.config.mem_max_burst_length_bytes = 256;
+ if (adev->flags & AMD_IS_APU) {
+ /* Get memory bank mapping mode. */
+ tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
+ dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
+ dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ /* Validate settings in case only one DIMM installed. */
+ if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
+ dimm00_addr_map = 0;
+ if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
+ dimm01_addr_map = 0;
+ if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
+ dimm10_addr_map = 0;
+ if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
+ dimm11_addr_map = 0;
+
+ /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
+ /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
+ if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
+ adev->gfx.config.mem_row_size_in_kb = 2;
+ else
+ adev->gfx.config.mem_row_size_in_kb = 1;
+ } else {
+ tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
+ adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+ if (adev->gfx.config.mem_row_size_in_kb > 4)
+ adev->gfx.config.mem_row_size_in_kb = 4;
+ }
+ /* XXX use MC settings? */
+ adev->gfx.config.shader_engine_tile_size = 32;
+ adev->gfx.config.num_gpus = 1;
+ adev->gfx.config.multi_gpu_tile_size = 64;
+
+ /* fix up row size */
+ gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ default:
+ gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ case 2:
+ gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ case 4:
+ gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ }
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+}
+
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+
+ return 0;
+}
+
+static int gfx_v7_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, j, k, r, ring_id;
+
+ switch (adev->asic_type) {
+ case CHIP_KAVERI:
+ adev->gfx.mec.num_mec = 2;
+ break;
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ default:
+ adev->gfx.mec.num_mec = 1;
+ break;
+ }
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ r = gfx_v7_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load gfx firmware!\n");
+ return r;
+ }
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ /* allocate mec buffers */
+ r = gfx_v7_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, "gfx");
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v7_0_compute_ring_init(adev,
+ ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+ gfx_v7_0_gpu_early_init(adev);
+
+ return r;
+}
+
+static int gfx_v7_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ gfx_v7_0_cp_compute_fini(adev);
+ amdgpu_gfx_rlc_fini(adev);
+ gfx_v7_0_mec_fini(adev);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+ if (adev->gfx.rlc.cp_table_size) {
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+ }
+ gfx_v7_0_free_microcode(adev);
+
+ return 0;
+}
+
+static int gfx_v7_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v7_0_constants_init(adev);
+
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+ /* init rlc */
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v7_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v7_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ gfx_v7_0_cp_enable(adev, false);
+ adev->gfx.rlc.funcs->stop(adev);
+ gfx_v7_0_fini_pg(adev);
+
+ return 0;
+}
+
+static int gfx_v7_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v7_0_hw_fini(adev);
+}
+
+static int gfx_v7_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gfx_v7_0_hw_init(adev);
+}
+
+static bool gfx_v7_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v7_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v7_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32(mmGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
+ grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
+ GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32(mmGRBM_STATUS2);
+ if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
+ grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
+
+ /* SRBM_STATUS */
+ tmp = RREG32(mmSRBM_STATUS);
+ if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ /* disable CG/PG */
+ gfx_v7_0_fini_pg(adev);
+ gfx_v7_0_update_cg(adev, false);
+
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* Disable GFX parsing/prefetching */
+ WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
+
+ /* Disable MEC parsing/prefetching */
+ WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ }
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ }
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
+ break;
+ case 1:
+ mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
+ break;
+ case 2:
+ mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
+ break;
+ case 3:
+ mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
+ cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
+ WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ switch (me_id) {
+ case 0:
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if ((ring->me == me_id) && (ring->pipe == pipe_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v7_0_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_ring *ring;
+ u8 me_id, pipe_id;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ switch (me_id) {
+ case 0:
+ drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if ((ring->me == me_id) && (ring->pipe == pipe_id))
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v7_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ // XXX soft reset the gfx block only
+ gfx_v7_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v7_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ gfx_v7_0_enable_gui_idle_interrupt(adev, false);
+ /* order matters! */
+ if (gate) {
+ gfx_v7_0_enable_mgcg(adev, true);
+ gfx_v7_0_enable_cgcg(adev, true);
+ } else {
+ gfx_v7_0_enable_cgcg(adev, false);
+ gfx_v7_0_enable_mgcg(adev, false);
+ }
+ gfx_v7_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static int gfx_v7_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ gate = true;
+
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ gfx_v7_0_update_gfx_pg(adev, gate);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
+ gfx_v7_0_enable_cp_pg(adev, gate);
+ gfx_v7_0_enable_gds_pg(adev, gate);
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
+ .name = "gfx_v7_0",
+ .early_init = gfx_v7_0_early_init,
+ .late_init = gfx_v7_0_late_init,
+ .sw_init = gfx_v7_0_sw_init,
+ .sw_fini = gfx_v7_0_sw_fini,
+ .hw_init = gfx_v7_0_hw_init,
+ .hw_fini = gfx_v7_0_hw_fini,
+ .suspend = gfx_v7_0_suspend,
+ .resume = gfx_v7_0_resume,
+ .is_idle = gfx_v7_0_is_idle,
+ .wait_for_idle = gfx_v7_0_wait_for_idle,
+ .soft_reset = gfx_v7_0_soft_reset,
+ .set_clockgating_state = gfx_v7_0_set_clockgating_state,
+ .set_powergating_state = gfx_v7_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v7_0_ring_get_rptr,
+ .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
+ .emit_frame_size =
+ 20 + /* gfx_v7_0_ring_emit_gds_switch */
+ 7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+ 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+ 5, /* SURFACE_SYNC */
+ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v7_0_ring_test_ring,
+ .test_ib = gfx_v7_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
+ .emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v7_0_ring_get_rptr,
+ .get_wptr = gfx_v7_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v7_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v7_0_ring_emit_gds_switch */
+ 7 + /* gfx_v7_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
+ 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ 7, /* gfx_v7_0_emit_mem_sync_compute */
+ .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v7_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v7_0_ring_emit_fence_compute,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v7_0_ring_test_ring,
+ .test_ib = gfx_v7_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
+};
+
+static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
+ .set = gfx_v7_0_set_eop_interrupt_state,
+ .process = gfx_v7_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
+ .set = gfx_v7_0_set_priv_reg_fault_state,
+ .process = gfx_v7_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
+ .set = gfx_v7_0_set_priv_inst_fault_state,
+ .process = gfx_v7_0_priv_inst_irq,
+};
+
+static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
+}
+
+
+static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ unsigned disable_masks[4 * 2];
+ u32 ao_cu_num;
+
+ if (adev->flags & AMD_IS_APU)
+ ao_cu_num = 2;
+ else
+ ao_cu_num = adev->gfx.config.max_cu_per_sh;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 4 && j < 2)
+ gfx_v7_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
+ cu_info->bitmap[0][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < ao_cu_num)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+ cu_info->max_waves_per_simd = 10;
+ cu_info->max_scratch_slots_per_cu = 32;
+ cu_info->wave_front_size = 64;
+ cu_info->lds_size = 64;
+}
+
+const struct amdgpu_ip_block_version gfx_v7_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 7,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &gfx_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v7_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 7,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &gfx_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v7_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 7,
+ .minor = 3,
+ .rev = 0,
+ .funcs = &gfx_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
new file mode 100644
index 000000000..eedce7d00
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V7_0_H__
+#define __GFX_V7_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
+
+struct amdgpu_device;
+struct cik_mqd;
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
new file mode 100644
index 000000000..1943beb13
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -0,0 +1,7230 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_ring.h"
+#include "vi.h"
+#include "vi_structs.h"
+#include "vid.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
+#include "clearstate_vi.h"
+
+#include "gmc/gmc_8_2_d.h"
+#include "gmc/gmc_8_2_sh_mask.h"
+
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
+#include "smu/smu_7_1_3_d.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+#define GFX8_NUM_GFX_RINGS 1
+#define GFX8_MEC_HPD_SIZE 4096
+
+#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
+#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
+#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
+#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
+
+#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
+#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
+#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
+#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
+#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
+#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
+#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
+#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
+#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
+
+#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
+#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
+#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
+#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
+#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
+#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
+
+/* BPM SERDES CMD */
+#define SET_BPM_SERDES_CMD 1
+#define CLE_BPM_SERDES_CMD 0
+
+/* BPM Register Address*/
+enum {
+ BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
+ BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
+ BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
+ BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
+ BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
+ BPM_REG_FGCG_MAX
+};
+
+#define RLC_FormatDirectRegListLength 14
+
+MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
+MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
+MODULE_FIRMWARE("amdgpu/stoney_me.bin");
+MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
+MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
+MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
+MODULE_FIRMWARE("amdgpu/tonga_me.bin");
+MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
+MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
+MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
+MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
+MODULE_FIRMWARE("amdgpu/topaz_me.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
+MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
+MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
+MODULE_FIRMWARE("amdgpu/fiji_me.bin");
+MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
+MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
+MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
+MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vegam_me.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
+
+static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
+{
+ {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
+ {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
+ {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
+ {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
+ {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
+ {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
+ {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
+ {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
+ {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
+ {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
+ {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
+ {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
+ {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
+ {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
+ {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
+ {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
+};
+
+static const u32 golden_settings_tonga_a11[] =
+{
+ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+ mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmGB_GPU_ID, 0x0000000f, 0x00000000,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 tonga_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
+};
+
+static const u32 tonga_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+};
+
+static const u32 golden_settings_vegam_a11[] =
+{
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 vegam_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
+static const u32 golden_settings_polaris11_a11[] =
+{
+ mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 polaris11_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
+static const u32 golden_settings_polaris10_a11[] =
+{
+ mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+ mmSQ_CONFIG, 0x07f80000, 0x07180000,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 polaris10_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
+static const u32 fiji_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
+};
+
+static const u32 golden_settings_fiji_a10[] =
+{
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+};
+
+static const u32 golden_settings_iceland_a11[] =
+{
+ mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0xc0000000, 0xc0000000,
+ mmGB_GPU_ID, 0x0000000f, 0x00000000,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
+};
+
+static const u32 iceland_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
+ mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
+ mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+};
+
+static const u32 cz_golden_settings_a11[] =
+{
+ mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmGB_GPU_ID, 0x0000000f, 0x00000000,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
+};
+
+static const u32 cz_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
+};
+
+static const u32 cz_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+};
+
+static const u32 stoney_golden_settings_a11[] =
+{
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmGB_GPU_ID, 0x0000000f, 0x00000000,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
+};
+
+static const u32 stoney_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
+static const u32 stoney_mgcg_cgcg_init[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
+ mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+};
+
+
+static const char * const sq_edc_source_names[] = {
+ "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
+ "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
+ "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
+ "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
+ "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
+ "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
+ "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
+};
+
+static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
+static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
+static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
+static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
+static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+
+#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL
+#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L
+
+static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ amdgpu_device_program_register_sequence(adev,
+ iceland_mgcg_cgcg_init,
+ ARRAY_SIZE(iceland_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_iceland_a11,
+ ARRAY_SIZE(golden_settings_iceland_a11));
+ amdgpu_device_program_register_sequence(adev,
+ iceland_golden_common_all,
+ ARRAY_SIZE(iceland_golden_common_all));
+ break;
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(fiji_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_fiji_a10,
+ ARRAY_SIZE(golden_settings_fiji_a10));
+ amdgpu_device_program_register_sequence(adev,
+ fiji_golden_common_all,
+ ARRAY_SIZE(fiji_golden_common_all));
+ break;
+
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(tonga_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_tonga_a11,
+ ARRAY_SIZE(golden_settings_tonga_a11));
+ amdgpu_device_program_register_sequence(adev,
+ tonga_golden_common_all,
+ ARRAY_SIZE(tonga_golden_common_all));
+ break;
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_vegam_a11,
+ ARRAY_SIZE(golden_settings_vegam_a11));
+ amdgpu_device_program_register_sequence(adev,
+ vegam_golden_common_all,
+ ARRAY_SIZE(vegam_golden_common_all));
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris11_a11,
+ ARRAY_SIZE(golden_settings_polaris11_a11));
+ amdgpu_device_program_register_sequence(adev,
+ polaris11_golden_common_all,
+ ARRAY_SIZE(polaris11_golden_common_all));
+ break;
+ case CHIP_POLARIS10:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris10_a11,
+ ARRAY_SIZE(golden_settings_polaris10_a11));
+ amdgpu_device_program_register_sequence(adev,
+ polaris10_golden_common_all,
+ ARRAY_SIZE(polaris10_golden_common_all));
+ data = RREG32_SMC(ixCG_ACLK_CNTL);
+ data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
+ data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
+ WREG32_SMC(ixCG_ACLK_CNTL, data);
+ if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
+ ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
+ (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
+ (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
+ amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
+ amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
+ }
+ break;
+ case CHIP_CARRIZO:
+ amdgpu_device_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+ ARRAY_SIZE(cz_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ cz_golden_settings_a11,
+ ARRAY_SIZE(cz_golden_settings_a11));
+ amdgpu_device_program_register_sequence(adev,
+ cz_golden_common_all,
+ ARRAY_SIZE(cz_golden_common_all));
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_program_register_sequence(adev,
+ stoney_mgcg_cgcg_init,
+ ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ stoney_golden_settings_a11,
+ ARRAY_SIZE(stoney_golden_settings_a11));
+ amdgpu_device_program_register_sequence(adev,
+ stoney_golden_common_all,
+ ARRAY_SIZE(stoney_golden_common_all));
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSCRATCH_REG0);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned int index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ if ((adev->asic_type != CHIP_STONEY) &&
+ (adev->asic_type != CHIP_TOPAZ))
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ unsigned int *tmp = NULL, i;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ chip_name = "topaz";
+ break;
+ case CHIP_TONGA:
+ chip_name = "tonga";
+ break;
+ case CHIP_CARRIZO:
+ chip_name = "carrizo";
+ break;
+ case CHIP_FIJI:
+ chip_name = "fiji";
+ break;
+ case CHIP_STONEY:
+ chip_name = "stoney";
+ break;
+ case CHIP_POLARIS10:
+ chip_name = "polaris10";
+ break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
+ case CHIP_POLARIS12:
+ chip_name = "polaris12";
+ break;
+ case CHIP_VEGAM:
+ chip_name = "vegam";
+ break;
+ default:
+ BUG();
+ }
+
+ if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err == -ENODEV) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ }
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ }
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err == -ENODEV) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ }
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ }
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+
+ adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ if (err == -ENODEV) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ }
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ }
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ /*
+ * Support for MCBP/Virtualization in combination with chained IBs is
+ * formal released on feature version #46
+ */
+ if (adev->gfx.ce_feature_version >= 46 &&
+ adev->gfx.pfp_feature_version >= 46) {
+ adev->virt.chained_ib_support = true;
+ DRM_INFO("Chained IB support enabled!\n");
+ } else
+ adev->virt.chained_ib_support = false;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+
+ if (!adev->gfx.rlc.register_list_format) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err == -ENODEV) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ }
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ }
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ if ((adev->asic_type != CHIP_STONEY) &&
+ (adev->asic_type != CHIP_TOPAZ)) {
+ if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ if (err == -ENODEV) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ }
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ }
+ if (!err) {
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ } else {
+ err = 0;
+ adev->gfx.mec2_fw = NULL;
+ }
+ }
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
+ info->fw = adev->gfx.pfp_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
+ info->fw = adev->gfx.me_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
+ info->fw = adev->gfx.ce_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
+ info->fw = adev->gfx.mec_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ /* we need account JT in */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+
+ if (amdgpu_sriov_vf(adev)) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
+ info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
+ info->fw = adev->gfx.mec_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
+ }
+
+ if (adev->gfx.mec2_fw) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ info->fw = adev->gfx.mec2_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx8: Failed to load firmware \"%s\"\n",
+ fw_name);
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ }
+ return err;
+}
+
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
+ PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_CARRIZO)
+ return 5;
+ else
+ return 4;
+}
+
+static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = vi_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
+ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+ r = amdgpu_gfx_rlc_init_cpt(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
+ return 0;
+}
+
+static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+}
+
+static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static const u32 vgpr_init_compute_shader[] =
+{
+ 0x7e000209, 0x7e020208,
+ 0x7e040207, 0x7e060206,
+ 0x7e080205, 0x7e0a0204,
+ 0x7e0c0203, 0x7e0e0202,
+ 0x7e100201, 0x7e120200,
+ 0x7e140209, 0x7e160208,
+ 0x7e180207, 0x7e1a0206,
+ 0x7e1c0205, 0x7e1e0204,
+ 0x7e200203, 0x7e220202,
+ 0x7e240201, 0x7e260200,
+ 0x7e280209, 0x7e2a0208,
+ 0x7e2c0207, 0x7e2e0206,
+ 0x7e300205, 0x7e320204,
+ 0x7e340203, 0x7e360202,
+ 0x7e380201, 0x7e3a0200,
+ 0x7e3c0209, 0x7e3e0208,
+ 0x7e400207, 0x7e420206,
+ 0x7e440205, 0x7e460204,
+ 0x7e480203, 0x7e4a0202,
+ 0x7e4c0201, 0x7e4e0200,
+ 0x7e500209, 0x7e520208,
+ 0x7e540207, 0x7e560206,
+ 0x7e580205, 0x7e5a0204,
+ 0x7e5c0203, 0x7e5e0202,
+ 0x7e600201, 0x7e620200,
+ 0x7e640209, 0x7e660208,
+ 0x7e680207, 0x7e6a0206,
+ 0x7e6c0205, 0x7e6e0204,
+ 0x7e700203, 0x7e720202,
+ 0x7e740201, 0x7e760200,
+ 0x7e780209, 0x7e7a0208,
+ 0x7e7c0207, 0x7e7e0206,
+ 0xbf8a0000, 0xbf810000,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+ 0xbe8a0100, 0xbe8c0102,
+ 0xbe8e0104, 0xbe900106,
+ 0xbe920108, 0xbe940100,
+ 0xbe960102, 0xbe980104,
+ 0xbe9a0106, 0xbe9c0108,
+ 0xbe9e0100, 0xbea00102,
+ 0xbea20104, 0xbea40106,
+ 0xbea60108, 0xbea80100,
+ 0xbeaa0102, 0xbeac0104,
+ 0xbeae0106, 0xbeb00108,
+ 0xbeb20100, 0xbeb40102,
+ 0xbeb60104, 0xbeb80106,
+ 0xbeba0108, 0xbebc0100,
+ 0xbebe0102, 0xbec00104,
+ 0xbec20106, 0xbec40108,
+ 0xbec60100, 0xbec80102,
+ 0xbee60004, 0xbee70005,
+ 0xbeea0006, 0xbeeb0007,
+ 0xbee80008, 0xbee90009,
+ 0xbefc0000, 0xbf8a0000,
+ 0xbf810000, 0x00000000,
+};
+
+static const u32 vgpr_init_regs[] =
+{
+ mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
+ mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
+ mmCOMPUTE_NUM_THREAD_X, 256*4,
+ mmCOMPUTE_NUM_THREAD_Y, 1,
+ mmCOMPUTE_NUM_THREAD_Z, 1,
+ mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
+ mmCOMPUTE_PGM_RSRC2, 20,
+ mmCOMPUTE_USER_DATA_0, 0xedcedc00,
+ mmCOMPUTE_USER_DATA_1, 0xedcedc01,
+ mmCOMPUTE_USER_DATA_2, 0xedcedc02,
+ mmCOMPUTE_USER_DATA_3, 0xedcedc03,
+ mmCOMPUTE_USER_DATA_4, 0xedcedc04,
+ mmCOMPUTE_USER_DATA_5, 0xedcedc05,
+ mmCOMPUTE_USER_DATA_6, 0xedcedc06,
+ mmCOMPUTE_USER_DATA_7, 0xedcedc07,
+ mmCOMPUTE_USER_DATA_8, 0xedcedc08,
+ mmCOMPUTE_USER_DATA_9, 0xedcedc09,
+};
+
+static const u32 sgpr1_init_regs[] =
+{
+ mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
+ mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
+ mmCOMPUTE_NUM_THREAD_X, 256*5,
+ mmCOMPUTE_NUM_THREAD_Y, 1,
+ mmCOMPUTE_NUM_THREAD_Z, 1,
+ mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
+ mmCOMPUTE_PGM_RSRC2, 20,
+ mmCOMPUTE_USER_DATA_0, 0xedcedc00,
+ mmCOMPUTE_USER_DATA_1, 0xedcedc01,
+ mmCOMPUTE_USER_DATA_2, 0xedcedc02,
+ mmCOMPUTE_USER_DATA_3, 0xedcedc03,
+ mmCOMPUTE_USER_DATA_4, 0xedcedc04,
+ mmCOMPUTE_USER_DATA_5, 0xedcedc05,
+ mmCOMPUTE_USER_DATA_6, 0xedcedc06,
+ mmCOMPUTE_USER_DATA_7, 0xedcedc07,
+ mmCOMPUTE_USER_DATA_8, 0xedcedc08,
+ mmCOMPUTE_USER_DATA_9, 0xedcedc09,
+};
+
+static const u32 sgpr2_init_regs[] =
+{
+ mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
+ mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
+ mmCOMPUTE_NUM_THREAD_X, 256*5,
+ mmCOMPUTE_NUM_THREAD_Y, 1,
+ mmCOMPUTE_NUM_THREAD_Z, 1,
+ mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
+ mmCOMPUTE_PGM_RSRC2, 20,
+ mmCOMPUTE_USER_DATA_0, 0xedcedc00,
+ mmCOMPUTE_USER_DATA_1, 0xedcedc01,
+ mmCOMPUTE_USER_DATA_2, 0xedcedc02,
+ mmCOMPUTE_USER_DATA_3, 0xedcedc03,
+ mmCOMPUTE_USER_DATA_4, 0xedcedc04,
+ mmCOMPUTE_USER_DATA_5, 0xedcedc05,
+ mmCOMPUTE_USER_DATA_6, 0xedcedc06,
+ mmCOMPUTE_USER_DATA_7, 0xedcedc07,
+ mmCOMPUTE_USER_DATA_8, 0xedcedc08,
+ mmCOMPUTE_USER_DATA_9, 0xedcedc09,
+};
+
+static const u32 sec_ded_counter_registers[] =
+{
+ mmCPC_EDC_ATC_CNT,
+ mmCPC_EDC_SCRATCH_CNT,
+ mmCPC_EDC_UCODE_CNT,
+ mmCPF_EDC_ATC_CNT,
+ mmCPF_EDC_ROQ_CNT,
+ mmCPF_EDC_TAG_CNT,
+ mmCPG_EDC_ATC_CNT,
+ mmCPG_EDC_DMA_CNT,
+ mmCPG_EDC_TAG_CNT,
+ mmDC_EDC_CSINVOC_CNT,
+ mmDC_EDC_RESTORE_CNT,
+ mmDC_EDC_STATE_CNT,
+ mmGDS_EDC_CNT,
+ mmGDS_EDC_GRBM_CNT,
+ mmGDS_EDC_OA_DED,
+ mmSPI_EDC_CNT,
+ mmSQC_ATC_EDC_GATCL1_CNT,
+ mmSQC_EDC_CNT,
+ mmSQ_EDC_DED_CNT,
+ mmSQ_EDC_INFO,
+ mmSQ_EDC_SEC_CNT,
+ mmTCC_EDC_CNT,
+ mmTCP_ATC_EDC_GATCL1_CNT,
+ mmTCP_EDC_CNT,
+ mmTD_EDC_CNT
+};
+
+static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ int r, i;
+ u32 tmp;
+ unsigned total_size, vgpr_offset, sgpr_offset;
+ u64 gpu_addr;
+
+ /* only supported on CZ */
+ if (adev->asic_type != CHIP_CARRIZO)
+ return 0;
+
+ /* bail if the compute ring is not ready */
+ if (!ring->sched.ready)
+ return 0;
+
+ tmp = RREG32(mmGB_EDC_MODE);
+ WREG32(mmGB_EDC_MODE, 0);
+
+ total_size =
+ (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
+ total_size +=
+ (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
+ total_size +=
+ (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
+ total_size = ALIGN(total_size, 256);
+ vgpr_offset = total_size;
+ total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
+ sgpr_offset = total_size;
+ total_size += sizeof(sgpr_init_compute_shader);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
+ ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
+
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
+ ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
+
+ /* init the ib length to 0 */
+ ib.length_dw = 0;
+
+ /* VGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 8; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR1 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 8; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR2 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 8; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r) {
+ DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(f, false);
+ if (r) {
+ DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ goto fail;
+ }
+
+ tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
+ tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
+ WREG32(mmGB_EDC_MODE, tmp);
+
+ tmp = RREG32(mmCC_GC_EDC_CONFIG);
+ tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
+ WREG32(mmCC_GC_EDC_CONFIG, tmp);
+
+
+ /* read back registers to clear the counters */
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+ RREG32(sec_ded_counter_registers[i]);
+
+fail:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+
+ return r;
+}
+
+static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+ u32 mc_arb_ramcfg;
+ u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+ u32 tmp;
+ int ret;
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 2;
+ adev->gfx.config.max_cu_per_sh = 6;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_FIJI:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 16;
+ adev->gfx.config.max_cu_per_sh = 16;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 16;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ ret = amdgpu_atombios_get_gfx_info(adev);
+ if (ret)
+ return ret;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_VEGAM:
+ ret = amdgpu_atombios_get_gfx_info(adev);
+ if (ret)
+ return ret;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_TONGA:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 8;
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 8;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_CARRIZO:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_STONEY:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 1;
+ adev->gfx.config.max_cu_per_sh = 3;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ default:
+ adev->gfx.config.max_shader_engines = 2;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ }
+
+ adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+ mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+ adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
+ adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
+ adev->gfx.config.mem_max_burst_length_bytes = 256;
+ if (adev->flags & AMD_IS_APU) {
+ /* Get memory bank mapping mode. */
+ tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
+ dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
+ dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ /* Validate settings in case only one DIMM installed. */
+ if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
+ dimm00_addr_map = 0;
+ if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
+ dimm01_addr_map = 0;
+ if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
+ dimm10_addr_map = 0;
+ if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
+ dimm11_addr_map = 0;
+
+ /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
+ /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
+ if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
+ adev->gfx.config.mem_row_size_in_kb = 2;
+ else
+ adev->gfx.config.mem_row_size_in_kb = 1;
+ } else {
+ tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
+ adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+ if (adev->gfx.config.mem_row_size_in_kb > 4)
+ adev->gfx.config.mem_row_size_in_kb = 4;
+ }
+
+ adev->gfx.config.shader_engine_tile_size = 32;
+ adev->gfx.config.num_gpus = 1;
+ adev->gfx.config.multi_gpu_tile_size = 64;
+
+ /* fix up row size */
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ default:
+ gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
+ break;
+ case 2:
+ gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
+ break;
+ case 4:
+ gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
+ break;
+ }
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ return 0;
+}
+
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX8_MEC_HPD_SIZE);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+
+ return 0;
+}
+
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
+
+static int gfx_v8_0_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id;
+ struct amdgpu_ring *ring;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ case CHIP_CARRIZO:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ adev->gfx.mec.num_mec = 2;
+ break;
+ case CHIP_TOPAZ:
+ case CHIP_STONEY:
+ default:
+ adev->gfx.mec.num_mec = 1;
+ break;
+ }
+
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ /* Add CP EDC/ECC irq */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ /* SQ interrupts. */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
+ &adev->gfx.sq_irq);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
+ return r;
+ }
+
+ INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = gfx_v8_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load gfx firmware!\n");
+ return r;
+ }
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v8_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, "gfx");
+ /* no gfx doorbells on iceland */
+ if (adev->asic_type != CHIP_TOPAZ) {
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0;
+ }
+
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v8_0_compute_ring_init(adev,
+ ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as well as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
+ if (r)
+ return r;
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+ r = gfx_v8_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int gfx_v8_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ gfx_v8_0_mec_fini(adev);
+ amdgpu_gfx_rlc_fini(adev);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+ }
+ gfx_v8_0_free_microcode(adev);
+
+ return 0;
+}
+
+static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ uint32_t *modearray, *mod2array;
+ const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+ u32 reg_offset;
+
+ modearray = adev->gfx.config.tile_mode_array;
+ mod2array = adev->gfx.config.macrotile_mode_array;
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ modearray[reg_offset] = 0;
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ mod2array[reg_offset] = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P2));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
+ reg_offset != 23)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ case CHIP_FIJI:
+ case CHIP_VEGAM:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ case CHIP_TONGA:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ case CHIP_POLARIS10:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ case CHIP_STONEY:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P2));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
+ reg_offset != 23)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
+ adev->asic_type);
+ fallthrough;
+
+ case CHIP_CARRIZO:
+ modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P2));
+ modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
+ reg_offset != 23)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
+
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
+
+ break;
+ }
+}
+
+static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
+ u32 se_num, u32 sh_num, u32 instance,
+ int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32(mmGRBM_GFX_INDEX, data);
+}
+
+static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ vi_srbm_select(adev, me, pipe, q, vm);
+}
+
+static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_RB_BACKEND_DISABLE) |
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+
+ data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void
+gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_VEGAM:
+ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
+ RB_XSEL2(1) | PKR_MAP(2) |
+ PKR_XSEL(1) | PKR_YSEL(1) |
+ SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
+ SE_PAIR_YSEL(2);
+ break;
+ case CHIP_TOPAZ:
+ case CHIP_CARRIZO:
+ *rconf |= RB_MAP_PKR0(2);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
+ SE_XSEL(1) | SE_YSEL(1);
+ *rconf1 |= 0x0;
+ break;
+ case CHIP_STONEY:
+ *rconf |= 0x0;
+ *rconf1 |= 0x0;
+ break;
+ default:
+ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
+ break;
+ }
+}
+
+static void
+gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
+ u32 raster_config, u32 raster_config_1,
+ unsigned rb_mask, unsigned num_rb)
+{
+ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
+ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
+ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
+ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
+ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
+
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= ~SE_PAIR_MAP_MASK;
+
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
+ } else {
+ raster_config_1 |=
+ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned raster_config_se = raster_config;
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= ~SE_MAP_MASK;
+
+ if (!se_mask[idx]) {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se &= ~PKR_MAP_MASK;
+
+ if (!pkr0_mask) {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR0_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se &= ~RB_MAP_PKR1_MASK;
+
+ if (!rb0_mask) {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se |=
+ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on VI */
+ gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on VI */
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+}
+
+static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 raster_config = 0, raster_config_1 = 0;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ unsigned num_rb_pipes;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = gfx_v8_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+
+ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines, 16);
+
+ gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
+
+ if (!adev->gfx.config.backend_enable_mask ||
+ adev->gfx.config.num_rbs >= num_rb_pipes) {
+ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
+ } else {
+ gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
+ adev->gfx.config.backend_enable_mask,
+ num_rb_pipes);
+ }
+
+ /* cache the values for userspace */
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ adev->gfx.config.rb_config[i][j].rb_backend_disable =
+ RREG32(mmCC_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].raster_config =
+ RREG32(mmPA_SC_RASTER_CONFIG);
+ adev->gfx.config.rb_config[i][j].raster_config_1 =
+ RREG32(mmPA_SC_RASTER_CONFIG_1);
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+/**
+ * gfx_v8_0_init_compute_vmid - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize compute vmid sh_mem registers
+ *
+ */
+static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
+ SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+ MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+ SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ vi_srbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32(mmSH_MEM_APE1_BASE, 1);
+ WREG32(mmSH_MEM_APE1_LIMIT, 0);
+ WREG32(mmSH_MEM_BASES, sh_mem_bases);
+ }
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ access. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
+}
+
+static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
+}
+
+static void gfx_v8_0_config_init(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ default:
+ adev->gfx.config.double_offchip_lds_buf = 1;
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ adev->gfx.config.double_offchip_lds_buf = 0;
+ break;
+ }
+}
+
+static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp, sh_static_mem_cfg;
+ int i;
+
+ WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
+ WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
+
+ gfx_v8_0_tiling_mode_table_init(adev);
+ gfx_v8_0_setup_rb(adev);
+ gfx_v8_0_get_cu_info(adev);
+ gfx_v8_0_config_init(adev);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
+ SWIZZLE_ENABLE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ ELEMENT_SIZE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ INDEX_STRIDE, 3);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
+ vi_srbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ WREG32(mmSH_MEM_CONFIG, tmp);
+ WREG32(mmSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ WREG32(mmSH_MEM_CONFIG, tmp);
+ tmp = adev->gmc.shared_aperture_start >> 48;
+ WREG32(mmSH_MEM_BASES, tmp);
+ }
+
+ WREG32(mmSH_MEM_APE1_BASE, 1);
+ WREG32(mmSH_MEM_APE1_LIMIT, 0);
+ }
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v8_0_init_compute_vmid(adev);
+ gfx_v8_0_init_gds_vmid(adev);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /*
+ * making sure that the following register writes will be broadcasted
+ * to all the shaders
+ */
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ WREG32(mmPA_SC_FIFO_SIZE,
+ (adev->gfx.config.sc_prim_fifo_size_frontend <<
+ PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_prim_fifo_size_backend <<
+ PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_hiz_tile_fifo_size <<
+ PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
+ (adev->gfx.config.sc_earlyz_tile_fifo_size <<
+ PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
+
+ tmp = RREG32(mmSPI_ARB_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
+ tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
+ WREG32(mmSPI_ARB_PRIORITY, tmp);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+}
+
+static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ if (k == adev->usec_timeout) {
+ gfx_v8_0_select_se_sh(adev, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ i, j);
+ return;
+ }
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32(mmCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+ /* csib */
+ WREG32(mmRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32(mmRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32(mmRLC_CSIB_LENGTH,
+ adev->gfx.rlc.clear_state_size);
+}
+
+static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
+ int ind_offset,
+ int list_size,
+ int *unique_indices,
+ int *indices_count,
+ int max_indices,
+ int *ind_start_offsets,
+ int *offset_count,
+ int max_offset)
+{
+ int indices;
+ bool new_entry = true;
+
+ for (; ind_offset < list_size; ind_offset++) {
+
+ if (new_entry) {
+ new_entry = false;
+ ind_start_offsets[*offset_count] = ind_offset;
+ *offset_count = *offset_count + 1;
+ BUG_ON(*offset_count >= max_offset);
+ }
+
+ if (register_list_format[ind_offset] == 0xFFFFFFFF) {
+ new_entry = true;
+ continue;
+ }
+
+ ind_offset += 2;
+
+ /* look for the matching indice */
+ for (indices = 0;
+ indices < *indices_count;
+ indices++) {
+ if (unique_indices[indices] ==
+ register_list_format[ind_offset])
+ break;
+ }
+
+ if (indices >= *indices_count) {
+ unique_indices[*indices_count] =
+ register_list_format[ind_offset];
+ indices = *indices_count;
+ *indices_count = *indices_count + 1;
+ BUG_ON(*indices_count >= max_indices);
+ }
+
+ register_list_format[ind_offset] = indices;
+ }
+}
+
+static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
+{
+ int i, temp, data;
+ int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
+ int indices_count = 0;
+ int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ int offset_count = 0;
+
+ int list_size;
+ unsigned int *register_list_format =
+ kmemdup(adev->gfx.rlc.register_list_format,
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+ if (!register_list_format)
+ return -ENOMEM;
+
+ gfx_v8_0_parse_ind_reg_list(register_list_format,
+ RLC_FormatDirectRegListLength,
+ adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+ unique_indices,
+ &indices_count,
+ ARRAY_SIZE(unique_indices),
+ indirect_start_offsets,
+ &offset_count,
+ ARRAY_SIZE(indirect_start_offsets));
+
+ /* save and restore list */
+ WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
+
+ WREG32(mmRLC_SRM_ARAM_ADDR, 0);
+ for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
+ WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
+
+ /* indirect list */
+ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
+ for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+ WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
+
+ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
+ list_size = list_size >> 1;
+ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
+ WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
+
+ /* starting offsets starts */
+ WREG32(mmRLC_GPM_SCRATCH_ADDR,
+ adev->gfx.rlc.starting_offsets_start);
+ for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
+ WREG32(mmRLC_GPM_SCRATCH_DATA,
+ indirect_start_offsets[i]);
+
+ /* unique indices */
+ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
+ data = mmRLC_SRM_INDEX_CNTL_DATA_0;
+ for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
+ if (unique_indices[i] != 0) {
+ WREG32(temp + i, unique_indices[i] & 0x3FFFF);
+ WREG32(data + i, unique_indices[i] >> 20);
+ }
+ }
+ kfree(register_list_format);
+
+ return 0;
+}
+
+static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
+{
+ WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
+
+ data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
+ data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
+ WREG32(mmRLC_PG_DELAY, data);
+
+ WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
+ WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
+
+}
+
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
+}
+
+static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
+{
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
+ gfx_v8_0_init_csb(adev);
+ gfx_v8_0_init_save_restore_list(adev);
+ gfx_v8_0_enable_save_restore_machine(adev);
+ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v8_0_init_power_gating(adev);
+ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+ } else if ((adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
+ gfx_v8_0_init_csb(adev);
+ gfx_v8_0_init_save_restore_list(adev);
+ gfx_v8_0_enable_save_restore_machine(adev);
+ gfx_v8_0_init_power_gating(adev);
+ }
+
+}
+
+static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
+{
+ WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
+
+ gfx_v8_0_enable_gui_idle_interrupt(adev, false);
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+}
+
+static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+
+ WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
+{
+ WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v8_0_enable_gui_idle_interrupt(adev, true);
+
+ udelay(50);
+}
+
+static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v8_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+ adev->gfx.rlc.funcs->reset(adev);
+ gfx_v8_0_init_pg(adev);
+ adev->gfx.rlc.funcs->start(adev);
+
+ return 0;
+}
+
+static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp = RREG32(mmCP_ME_CNTL);
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
+ }
+ WREG32(mmCP_ME_CNTL, tmp);
+ udelay(50);
+}
+
+static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = vi_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+ /* pa_sc_raster_config/pa_sc_raster_config1 */
+ count += 4;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+
+ /* init the CP */
+ WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
+ WREG32(mmCP_ENDIAN_SWAP, 0);
+ WREG32(mmCP_DEVICE_ID, 1);
+
+ gfx_v8_0_cp_gfx_enable(adev, true);
+
+ r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ /* clear state buffer */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = vi_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring,
+ ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
+ amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
+ amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ /* init the CE partitions */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ u32 tmp;
+ /* no gfx doorbells on iceland */
+ if (adev->asic_type == CHIP_TOPAZ)
+ return;
+
+ tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
+ }
+
+ WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
+
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER,
+ adev->doorbell_index.gfx_ring0);
+ WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32(mmCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32(mmCP_RB_VMID, 0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
+ ring->wptr = 0;
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+ WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
+ mdelay(1);
+ WREG32(mmCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32(mmCP_RB0_BASE, rb_addr);
+ WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ gfx_v8_0_set_cpg_door_bell(adev, ring);
+ /* start the ring */
+ amdgpu_ring_clear_ring(ring);
+ gfx_v8_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable) {
+ WREG32(mmCP_MEC_CNTL, 0);
+ } else {
+ WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+/* KIQ functions */
+static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32(mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32(mmRLC_CP_SCHEDULERS, tmp);
+}
+
+static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ uint64_t queue_mask = 0;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
+ continue;
+
+ /* This situation may be hit in the future if a new HW
+ * generation exposes more than 64 queues. If so, the
+ * definition of queue_mask needs updating */
+ if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
+ DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ break;
+ }
+
+ queue_mask |= (1ull << i);
+ }
+
+ r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+ /* set resources */
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+
+ /* map queues */
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+ }
+
+ amdgpu_ring_commit(kiq_ring);
+
+ return 0;
+}
+
+static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
+{
+ int i, r = 0;
+
+ if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
+ WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ udelay(1);
+ }
+ if (i == adev->usec_timeout)
+ r = -ETIMEDOUT;
+ }
+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+ WREG32(mmCP_HQD_PQ_RPTR, 0);
+ WREG32(mmCP_HQD_PQ_WPTR, 0);
+
+ return r;
+}
+
+static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct vi_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+ mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32(mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
+ CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN,
+ ring->use_doorbell ? 1 : 0);
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32(mmCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32(mmCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_wptr = ring->wptr;
+ mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MTYPE */
+ tmp = RREG32(mmCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ tmp = RREG32(mmCP_HQD_IQ_TIMER);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
+ mqd->cp_hqd_iq_timer = tmp;
+
+ tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
+ mqd->cp_hqd_ctx_save_control = tmp;
+
+ /* defaults */
+ mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
+ mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
+ mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
+ mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
+ mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
+ mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
+ mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
+ mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
+ mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
+ mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
+ mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
+ mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
+
+ /* set static priority for a queue/ring */
+ gfx_v8_0_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
+ struct vi_mqd *mqd)
+{
+ uint32_t mqd_reg;
+ uint32_t *mqd_data;
+
+ /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+ mqd_data = &mqd->cp_mqd_base_addr_lo;
+
+ /* disable wptr polling */
+ WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* program all HQD registers */
+ for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+ * This is safe since EOP RPTR==WPTR for any inactive HQD
+ * on ASICs that do not support context-save.
+ * EOP writes/reads can start anywhere in the ring.
+ */
+ if (adev->asic_type != CHIP_TONGA) {
+ WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+ WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+ WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+ }
+
+ for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ /* activate the HQD */
+ for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+ WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+ return 0;
+}
+
+static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct vi_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v8_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_mqd_commit(adev, mqd);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+ ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_mqd_init(ring);
+ gfx_v8_0_mqd_commit(adev, mqd);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct vi_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+ ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_mqd_init(ring);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+ return 0;
+}
+
+static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
+{
+ if (adev->asic_type > CHIP_TONGA) {
+ WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
+ WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
+ }
+ /* enable doorbells */
+ WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+}
+
+static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v8_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v8_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v8_0_kcq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ gfx_v8_0_set_mec_doorbell_range(adev);
+
+ r = gfx_v8_0_kiq_kcq_enable(adev);
+ if (r)
+ goto done;
+
+done:
+ return r;
+}
+
+static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ /* collect all the ring_tests here, gfx, kiq, compute */
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ ring = &adev->gfx.kiq[0].ring;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ amdgpu_ring_test_helper(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v8_0_enable_gui_idle_interrupt(adev, false);
+
+ r = gfx_v8_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v8_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v8_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v8_0_cp_test_all_rings(adev);
+ if (r)
+ return r;
+
+ gfx_v8_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v8_0_cp_gfx_enable(adev, enable);
+ gfx_v8_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v8_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfx_v8_0_init_golden_registers(adev);
+ gfx_v8_0_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v8_0_cp_resume(adev);
+
+ return r;
+}
+
+static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+
+ r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
+ if (r)
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+ r = amdgpu_ring_test_helper(kiq_ring);
+ if (r)
+ DRM_ERROR("KCQ disable failed\n");
+
+ return r;
+}
+
+static bool gfx_v8_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
+ || RREG32(mmGRBM_STATUS2) != 0x8)
+ return false;
+ else
+ return true;
+}
+
+static bool gfx_v8_0_rlc_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (RREG32(mmGRBM_STATUS2) != 0x8)
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v8_0_wait_for_rlc_idle(void *handle)
+{
+ unsigned int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v8_0_rlc_is_idle(handle))
+ return 0;
+
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v8_0_wait_for_idle(void *handle)
+{
+ unsigned int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v8_0_is_idle(handle))
+ return 0;
+
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v8_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+
+ amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
+
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
+ gfx_v8_0_kcq_disable(adev);
+
+ if (amdgpu_sriov_vf(adev)) {
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ if (!gfx_v8_0_wait_for_idle(adev))
+ gfx_v8_0_cp_enable(adev, false);
+ else
+ pr_err("cp is busy, skip halt cp\n");
+ if (!gfx_v8_0_wait_for_rlc_idle(adev))
+ adev->gfx.rlc.funcs->stop(adev);
+ else
+ pr_err("rlc is busy, skip halt rlc\n");
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static int gfx_v8_0_suspend(void *handle)
+{
+ return gfx_v8_0_hw_fini(handle);
+}
+
+static int gfx_v8_0_resume(void *handle)
+{
+ return gfx_v8_0_hw_init(handle);
+}
+
+static bool gfx_v8_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+ u32 tmp;
+
+ /* GRBM_STATUS */
+ tmp = RREG32(mmGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
+ GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32(mmGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
+ REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
+ REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+ SOFT_RESET_GRBM, 1);
+ }
+
+ /* SRBM_STATUS */
+ tmp = RREG32(mmSRBM_STATUS);
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ adev->gfx.grbm_soft_reset = grbm_soft_reset;
+ adev->gfx.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->gfx.grbm_soft_reset = 0;
+ adev->gfx.srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int gfx_v8_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0;
+
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
+
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
+ /* Disable GFX parsing/prefetching */
+ gfx_v8_0_cp_gfx_enable(adev, false);
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_deactivate_hqd(adev, 2);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ /* Disable MEC parsing/prefetching */
+ gfx_v8_0_cp_compute_enable(adev, false);
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+ u32 tmp;
+
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
+
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+ srbm_soft_reset = adev->gfx.srbm_soft_reset;
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
+ WREG32(mmGMCON_DEBUG, tmp);
+ udelay(50);
+ }
+
+ if (grbm_soft_reset) {
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32(mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmGRBM_SOFT_RESET);
+ }
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ }
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
+ tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
+ WREG32(mmGMCON_DEBUG, tmp);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
+ return 0;
+}
+
+static int gfx_v8_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset = 0;
+
+ if ((!adev->gfx.grbm_soft_reset) &&
+ (!adev->gfx.srbm_soft_reset))
+ return 0;
+
+ grbm_soft_reset = adev->gfx.grbm_soft_reset;
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
+ int i;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ mutex_lock(&adev->srbm_mutex);
+ vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v8_0_deactivate_hqd(adev, 2);
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ gfx_v8_0_kiq_resume(adev);
+ gfx_v8_0_kcq_resume(adev);
+ }
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
+ gfx_v8_0_cp_gfx_resume(adev);
+
+ gfx_v8_0_cp_test_all_rings(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+
+ return 0;
+}
+
+/**
+ * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetches a GPU clock counter snapshot.
+ * Returns the 64 bit clock counter snapshot.
+ */
+static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ return clock;
+}
+
+static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ /* GDS Base */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gds_base);
+
+ /* GDS Size */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gds_size);
+
+ /* GWS */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32(mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32(mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32(mmSQ_IND_DATA);
+}
+
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 0 wave data */
+ dst[(*no_fields)++] = 0;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+
+static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v8_0_select_se_sh,
+ .read_wave_data = &gfx_v8_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
+ .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
+};
+
+static int gfx_v8_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.xcc_mask = 1;
+ adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
+ gfx_v8_0_set_ring_funcs(adev);
+ gfx_v8_0_set_irq_funcs(adev);
+ gfx_v8_0_set_gds_init(adev);
+ gfx_v8_0_set_rlc_funcs(adev);
+
+ return 0;
+}
+
+static int gfx_v8_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ /* requires IBs so do in late init after IB pool is initialized */
+ r = gfx_v8_0_do_edc_gpr_workarounds(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
+ return r;
+ }
+
+ r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
+ if (r) {
+ DRM_ERROR(
+ "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
+ r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if ((adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM))
+ /* Send msg to SMU via Powerplay */
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+
+ WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
+}
+
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
+}
+
+static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
+}
+
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
+
+ /* Read any GFX register to wake up GFX. */
+ if (!enable)
+ RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+ cz_enable_gfx_cg_power_gating(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ cz_enable_gfx_pipeline_power_gating(adev, true);
+ } else {
+ cz_enable_gfx_cg_power_gating(adev, false);
+ cz_enable_gfx_pipeline_power_gating(adev, false);
+ }
+}
+
+static int gfx_v8_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+ cz_enable_sck_slow_down_on_power_up(adev, true);
+ cz_enable_sck_slow_down_on_power_down(adev, true);
+ } else {
+ cz_enable_sck_slow_down_on_power_up(adev, false);
+ cz_enable_sck_slow_down_on_power_down(adev, false);
+ }
+ if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+ cz_enable_cp_power_gating(adev, true);
+ else
+ cz_enable_cp_power_gating(adev, false);
+
+ cz_update_gfx_cg_power_gating(adev, enable);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+ polaris11_enable_gfx_quick_mg_power_gating(adev, true);
+ else
+ polaris11_enable_gfx_quick_mg_power_gating(adev, false);
+ break;
+ default:
+ break;
+ }
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return 0;
+}
+
+static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLG */
+ data = RREG32(mmRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CGTS */
+ data = RREG32(mmCGTS_SM_CTRL_REG);
+ if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_CGTS;
+
+ /* AMD_CG_SUPPORT_GFX_CGTS_LS */
+ if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32(mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32(mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+}
+
+static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
+ uint32_t reg_addr, uint32_t cmd)
+{
+ uint32_t data;
+
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+ WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+
+ data = RREG32(mmRLC_SERDES_WR_CTRL);
+ if (adev->asic_type == CHIP_STONEY)
+ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
+ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
+ RLC_SERDES_WR_CTRL__POWER_UP_MASK |
+ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
+ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
+ else
+ data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
+ RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
+ RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
+ RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
+ RLC_SERDES_WR_CTRL__POWER_UP_MASK |
+ RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
+ RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
+ RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
+ RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
+ data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
+ (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
+ (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
+ (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
+
+ WREG32(mmRLC_SERDES_WR_CTRL, data);
+}
+
+#define MSG_ENTER_RLC_SAFE_MODE 1
+#define MSG_EXIT_RLC_SAFE_MODE 0
+#define RLC_GPR_REG2__REQ_MASK 0x00000001
+#define RLC_GPR_REG2__REQ__SHIFT 0
+#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
+#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
+
+static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting;
+
+ rlc_setting = RREG32(mmRLC_CNTL);
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return false;
+
+ return true;
+}
+
+static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+ data = RREG32(mmRLC_CNTL);
+ data |= RLC_SAFE_MODE__CMD_MASK;
+ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32(mmRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if ((RREG32(mmRLC_GPM_STAT) &
+ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
+ RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
+ (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
+ RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
+ break;
+ udelay(1);
+ }
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RREG32(mmRLC_CNTL);
+ data |= RLC_SAFE_MODE__CMD_MASK;
+ data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
+ WREG32(mmRLC_SAFE_MODE, data);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
+ else
+ data = RREG32(mmRLC_SPM_VMID);
+
+ data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
+ else
+ WREG32(mmRLC_SPM_VMID, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
+ .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v8_0_set_safe_mode,
+ .unset_safe_mode = gfx_v8_0_unset_safe_mode,
+ .init = gfx_v8_0_rlc_init,
+ .get_csb_size = gfx_v8_0_get_csb_size,
+ .get_csb_buffer = gfx_v8_0_get_csb_buffer,
+ .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
+ .resume = gfx_v8_0_rlc_resume,
+ .stop = gfx_v8_0_rlc_stop,
+ .reset = gfx_v8_0_rlc_reset,
+ .start = gfx_v8_0_rlc_start,
+ .update_spm_vmid = gfx_v8_0_update_spm_vmid
+};
+
+static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
+ /* 1 - RLC memory Light sleep */
+ WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
+ WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
+ }
+
+ /* 3 - RLC_CGTT_MGCG_OVERRIDE */
+ temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ if (adev->flags & AMD_IS_APU)
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
+ else
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
+
+ if (temp != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 5 - clear mgcg override */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
+ /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
+ temp = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
+ data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
+ data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
+ data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
+ if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
+ data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
+ data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
+ data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
+ if (temp != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+ }
+ udelay(50);
+
+ /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+ } else {
+ /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
+ temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
+ if (temp != data)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32(mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32(mmRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32(mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32(mmCP_MEM_SLP_CNTL, data);
+ }
+
+ /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
+ temp = data = RREG32(mmCGTS_SM_CTRL_REG);
+ data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
+ CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
+ if (temp != data)
+ WREG32(mmCGTS_SM_CTRL_REG, data);
+
+ /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 6 - set mgcg override */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
+
+ udelay(50);
+
+ /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, temp1, data, data1;
+
+ temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
+ if (temp1 != data1)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
+
+ /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 2 - clear cgcg override */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
+
+ /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* 3 - write cmd to set CGLS */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
+
+ /* 4 - enable cgcg */
+ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ /* enable cgls*/
+ data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
+
+ if (temp1 != data1)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
+ } else {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (temp != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+ /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
+ * Cmp_busy/GFX_Idle interrupts
+ */
+ gfx_v8_0_enable_gui_idle_interrupt(adev, true);
+ } else {
+ /* disable cntx_empty_int_enable & GFX Idle interrupt */
+ gfx_v8_0_enable_gui_idle_interrupt(adev, false);
+
+ /* TEST CGCG */
+ temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
+ data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
+ if (temp1 != data1)
+ WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
+
+ /* read gfx register to wake up cgcg */
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+ RREG32(mmCB_CGTT_SCLK_CTRL);
+
+ /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* write cmd to Set CGCG Override */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
+
+ /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ /* write cmd to Clear CGLS */
+ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
+
+ /* disable cgcg, cgls should be disabled too. */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ if (temp != data)
+ WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+ /* enable interrupts again for PG */
+ gfx_v8_0_enable_gui_idle_interrupt(adev, true);
+ }
+
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (enable) {
+ /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
+ * === MGCG + MGLS + TS(CG/LS) ===
+ */
+ gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
+ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
+ * === CGCG + CGLS ===
+ */
+ gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
+ gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
+ }
+ return 0;
+}
+
+static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ uint32_t msg_id, pp_state = 0;
+ uint32_t pp_support_state = 0;
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CG,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_MG,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+
+ uint32_t msg_id, pp_state = 0;
+ uint32_t pp_support_state = 0;
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CG,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_3D,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_MG,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_RLC,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+ msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
+ PP_BLOCK_GFX_CP,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ return 0;
+}
+
+static int gfx_v8_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ gfx_v8_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ case CHIP_TONGA:
+ gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ /* XXX check if swapping is necessary on BE */
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32(mmCP_RB0_WPTR);
+}
+
+static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ (void)RREG32(mmCP_RB0_WPTR);
+ }
+}
+
+static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 ref_and_mask, reg_mem_engine;
+
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
+ reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
+ WAIT_REG_MEM_FUNCTION(3) | /* == */
+ reg_mem_engine));
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
+ amdgpu_ring_write(ring, ref_and_mask);
+ amdgpu_ring_write(ring, ref_and_mask);
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+}
+
+static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
+ EVENT_INDEX(4));
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+ EVENT_INDEX(0));
+}
+
+static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
+ gfx_v8_0_ring_emit_de_meta(ring);
+ }
+
+ amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ (ib->gpu_addr & 0xFFFFFFFC));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ (ib->gpu_addr & 0xFFFFFFFC));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* Workaround for cache flush problems. First send a dummy EOP
+ * event down the pipe with seq one below.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(1) | INT_SEL(0));
+ amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+ amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+ /* Then send the real EOP event down the pipe:
+ * EVENT_WRITE_EOP - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+
+}
+
+static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
+}
+
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for the invalidate to complete */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+ WAIT_REG_MEM_FUNCTION(0) | /* always */
+ WAIT_REG_MEM_ENGINE(0))); /* me */
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* ref */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+
+ /* compute doesn't have PFP */
+ if (usepfp) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ return *ring->wptr_cpu_addr;
+}
+
+static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+}
+
+static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
+ u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+}
+
+static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, mmCPC_INT_STATUS);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ if (amdgpu_sriov_vf(ring->adev))
+ gfx_v8_0_ring_emit_ce_meta(ring);
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ gfx_v8_0_ring_emit_vgt_flush(ring);
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ return ret;
+}
+
+static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr & ring->buf_mask) - 1;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
+}
+
+static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = 1 << 16; /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
+}
+
+static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+}
+
+static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
+ break;
+ case 1:
+ mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
+ break;
+ case 2:
+ mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
+ break;
+ case 3:
+ mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+ return 0;
+}
+
+static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+ return 0;
+}
+
+static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 0;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 1;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+
+ return 0;
+}
+
+static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 1;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
+ enable_flag);
+
+ return 0;
+}
+
+static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v8_0_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v8_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v8_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("CP EDC/ECC error detected.");
+ return 0;
+}
+
+static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
+ bool from_wq)
+{
+ u32 enc, se_id, sh_id, cu_id;
+ char type[20];
+ int sq_edc_source = -1;
+
+ enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
+ se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
+
+ switch (enc) {
+ case 0:
+ DRM_INFO("SQ general purpose intr detected:"
+ "se_id %d, immed_overflow %d, host_reg_overflow %d,"
+ "host_cmd_overflow %d, cmd_timestamp %d,"
+ "reg_timestamp %d, thread_trace_buff_full %d,"
+ "wlt %d, thread_trace %d.\n",
+ se_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
+ );
+ break;
+ case 1:
+ case 2:
+
+ cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
+ sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
+
+ /*
+ * This function can be called either directly from ISR
+ * or from BH in which case we can access SQ_EDC_INFO
+ * instance
+ */
+ if (from_wq) {
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
+
+ sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
+
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ if (enc == 1)
+ sprintf(type, "instruction intr");
+ else
+ sprintf(type, "EDC/ECC error");
+
+ DRM_INFO(
+ "SQ %s detected: "
+ "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
+ "trap %s, sq_ed_info.source %s.\n",
+ type, se_id, sh_id, cu_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
+ (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
+ );
+ break;
+ default:
+ DRM_ERROR("SQ invalid encoding type\n.");
+ }
+}
+
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
+{
+
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
+ struct sq_work *sq_work = container_of(work, struct sq_work, work);
+
+ gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
+}
+
+static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned ih_data = entry->src_data[0];
+
+ /*
+ * Try to submit work so SQ_EDC_INFO can be accessed from
+ * BH. If previous work submission hasn't finished yet
+ * just print whatever info is possible directly from the ISR.
+ */
+ if (work_pending(&adev->gfx.sq_work.work)) {
+ gfx_v8_0_parse_sq_irq(adev, ih_data, false);
+ } else {
+ adev->gfx.sq_work.ih_data = ih_data;
+ schedule_work(&adev->gfx.sq_work.work);
+ }
+
+ return 0;
+}
+
+static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA |
+ PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA |
+ PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+
+/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f
+static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
+ break;
+ case 1:
+ wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
+ break;
+ case 2:
+ wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
+ break;
+ case 3:
+ wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+
+#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
+static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+ /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+ amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
+
+ }
+
+}
+
+static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
+ .name = "gfx_v8_0",
+ .early_init = gfx_v8_0_early_init,
+ .late_init = gfx_v8_0_late_init,
+ .sw_init = gfx_v8_0_sw_init,
+ .sw_fini = gfx_v8_0_sw_fini,
+ .hw_init = gfx_v8_0_hw_init,
+ .hw_fini = gfx_v8_0_hw_fini,
+ .suspend = gfx_v8_0_suspend,
+ .resume = gfx_v8_0_resume,
+ .is_idle = gfx_v8_0_is_idle,
+ .wait_for_idle = gfx_v8_0_wait_for_idle,
+ .check_soft_reset = gfx_v8_0_check_soft_reset,
+ .pre_soft_reset = gfx_v8_0_pre_soft_reset,
+ .soft_reset = gfx_v8_0_soft_reset,
+ .post_soft_reset = gfx_v8_0_post_soft_reset,
+ .set_clockgating_state = gfx_v8_0_set_clockgating_state,
+ .set_powergating_state = gfx_v8_0_set_powergating_state,
+ .get_clockgating_state = gfx_v8_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v8_0_ring_get_rptr,
+ .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* maximum 215dw if count 16 IBs in */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
+ 12 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ the first COND_EXEC jump to the place just
+ prior to this double SWITCH_BUFFER */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 12 + 12 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 5, /* SURFACE_SYNC */
+ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v8_0_ring_test_ring,
+ .test_ib = gfx_v8_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v8_ring_emit_sb,
+ .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
+ .emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v8_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v8_0_ring_get_rptr,
+ .get_wptr = gfx_v8_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v8_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v8_0_ring_emit_gds_switch */
+ 7 + /* gfx_v8_0_ring_emit_hdp_flush */
+ 5 + /* hdp_invalidate */
+ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
+ 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ 7 + /* gfx_v8_0_emit_mem_sync_compute */
+ 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
+ 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v8_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v8_0_ring_emit_fence_compute,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v8_0_ring_test_ring,
+ .test_ib = gfx_v8_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
+ .emit_wave_limit = gfx_v8_0_emit_wave_limit,
+};
+
+static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
+ .get_rptr = gfx_v8_0_ring_get_rptr,
+ .get_wptr = gfx_v8_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v8_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v8_0_ring_emit_gds_switch */
+ 7 + /* gfx_v8_0_ring_emit_hdp_flush */
+ 5 + /* hdp_invalidate */
+ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+ 17 + /* gfx_v8_0_ring_emit_vm_flush */
+ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
+ .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v8_0_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v8_0_ring_emit_rreg,
+ .emit_wreg = gfx_v8_0_ring_emit_wreg,
+};
+
+static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
+ .set = gfx_v8_0_set_eop_interrupt_state,
+ .process = gfx_v8_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
+ .set = gfx_v8_0_set_priv_reg_fault_state,
+ .process = gfx_v8_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
+ .set = gfx_v8_0_set_priv_inst_fault_state,
+ .process = gfx_v8_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
+ .set = gfx_v8_0_set_cp_ecc_int_state,
+ .process = gfx_v8_0_cp_ecc_error_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
+ .set = gfx_v8_0_set_sq_int_state,
+ .process = gfx_v8_0_sq_irq,
+};
+
+static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
+
+ adev->gfx.cp_ecc_error_irq.num_types = 1;
+ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
+
+ adev->gfx.sq_irq.num_types = 1;
+ adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
+}
+
+static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &iceland_rlc_funcs;
+}
+
+static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+ adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
+}
+
+static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
+ RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
+}
+
+static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ unsigned disable_masks[4 * 2];
+ u32 ao_cu_num;
+
+ memset(cu_info, 0, sizeof(*cu_info));
+
+ if (adev->flags & AMD_IS_APU)
+ ao_cu_num = 2;
+ else
+ ao_cu_num = adev->gfx.config.max_cu_per_sh;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 4 && j < 2)
+ gfx_v8_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
+ cu_info->bitmap[0][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ if (bitmap & mask) {
+ if (counter < ao_cu_num)
+ ao_bitmap |= mask;
+ counter ++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+ cu_info->max_waves_per_simd = 10;
+ cu_info->max_scratch_slots_per_cu = 32;
+ cu_info->wave_front_size = 64;
+ cu_info->lds_size = 64;
+}
+
+const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 8,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 8,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &gfx_v8_0_ip_funcs,
+};
+
+static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+{
+ uint64_t ce_payload_addr;
+ int cnt_ce;
+ union {
+ struct vi_ce_ib_state regular;
+ struct vi_ce_ib_state_chained_ib chained;
+ } ce_payload = {};
+
+ if (ring->adev->virt.chained_ib_support) {
+ ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+ offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
+ cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
+ } else {
+ ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+ offsetof(struct vi_gfx_meta_data, ce_payload);
+ cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
+}
+
+static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+{
+ uint64_t de_payload_addr, gds_addr, csa_addr;
+ int cnt_de;
+ union {
+ struct vi_de_ib_state regular;
+ struct vi_de_ib_state_chained_ib chained;
+ } de_payload = {};
+
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
+ gds_addr = csa_addr + 4096;
+ if (ring->adev->virt.chained_ib_support) {
+ de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
+ de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
+ cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
+ } else {
+ de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
+ de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
+ cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
new file mode 100644
index 000000000..ec3f11fa9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V8_0_H__
+#define __GFX_V8_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
+
+struct amdgpu_device;
+struct vi_mqd;
+
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
new file mode 100644
index 000000000..2e23d08b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -0,0 +1,7269 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_pm.h"
+
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "clearstate_gfx9.h"
+#include "v9_structs.h"
+
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
+#include "amdgpu_ras.h"
+
+#include "amdgpu_ring_mux.h"
+#include "gfx_v9_4.h"
+#include "gfx_v9_0.h"
+#include "gfx_v9_4_2.h"
+
+#include "asic_reg/pwr/pwr_10_0_offset.h"
+#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
+#include "asic_reg/gc/gc_9_0_default.h"
+
+#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS 2
+#define GFX9_MEC_HPD_SIZE 4096
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
+
+#define mmGCEA_PROBE_MAP 0x070c
+#define mmGCEA_PROBE_MAP_BASE_IDX 0
+
+MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega10_me.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega12_me.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/raven_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven_me.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
+MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
+MODULE_FIRMWARE("amdgpu/picasso_me.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
+
+MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven2_me.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
+MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
+MODULE_FIRMWARE("amdgpu/renoir_me.bin");
+MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
+MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
+
+#define mmTCP_CHAN_STEER_0_ARCT 0x0b03
+#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_1_ARCT 0x0b04
+#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_2_ARCT 0x0b09
+#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
+#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
+#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
+#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
+#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
+
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
+
+enum ta_ras_gfx_subblock {
+ /*CPC*/
+ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+ TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPC_UCODE,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME1,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+ TA_RAS_BLOCK__GFX_DC_STATE_ME2,
+ TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+ TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+ /* CPF*/
+ TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+ TA_RAS_BLOCK__GFX_CPF_TAG,
+ TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
+ /* CPG*/
+ TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
+ TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
+ TA_RAS_BLOCK__GFX_CPG_TAG,
+ TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
+ /* GDS*/
+ TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
+ TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+ TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+ /* SPI*/
+ TA_RAS_BLOCK__GFX_SPI_SR_MEM,
+ /* SQ*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQ_LDS_D,
+ TA_RAS_BLOCK__GFX_SQ_LDS_I,
+ TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
+ TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
+ /* SQC (3 ranges)*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ /* SQC range 0*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+ /* SQC range 1*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+ /* SQC range 2*/
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+ TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
+ /* TA*/
+ TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
+ TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
+ TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
+ /* TCA*/
+ TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+ /* TCC (5 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ /* TCC range 0*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+ TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+ /* TCC range 1*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
+ TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+ /* TCC range 2*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
+ TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+ TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+ TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
+ TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+ /* TCC range 3*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
+ TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+ /* TCC range 4*/
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
+ TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+ TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
+ /* TCI*/
+ TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+ /* TCP*/
+ TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
+ TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+ TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
+ TA_RAS_BLOCK__GFX_TCP_DB_RAM,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+ TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+ /* TD*/
+ TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
+ TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+ TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
+ /* EA (3 sub-ranges)*/
+ TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ /* EA range 0*/
+ TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+ /* EA range 1*/
+ TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
+ TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+ TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+ /* EA range 2*/
+ TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
+ TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+ TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+ TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
+ /* UTC VM L2 bank*/
+ TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+ /* UTC VM walker*/
+ TA_RAS_BLOCK__UTC_VML2_WALKER,
+ /* UTC ATC L2 2MB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+ /* UTC ATC L2 4KB cache*/
+ TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+ TA_RAS_BLOCK__GFX_MAX
+};
+
+struct ras_gfx_subblock {
+ unsigned char *name;
+ int ta_subblock;
+ int hw_supported_error_type;
+ int sw_supported_error_type;
+};
+
+#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
+ [AMDGPU_RAS_BLOCK__##subblock] = { \
+ #subblock, \
+ TA_RAS_BLOCK__##subblock, \
+ ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
+ (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
+ }
+
+static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
+ 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
+ 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
+ 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
+ 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
+ AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_0[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
+};
+
+static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
+ {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
+ {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+};
+
+#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
+#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
+#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
+#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
+
+static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
+static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask);
+static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid);
+
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
+static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg10,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ break;
+ case IP_VERSION(9, 2, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_2_1,
+ ARRAY_SIZE(golden_settings_gc_9_2_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_2_1_vg12,
+ ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
+ break;
+ case IP_VERSION(9, 4, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg20,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg20));
+ break;
+ case IP_VERSION(9, 4, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_1_arct,
+ ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
+ break;
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ soc15_program_register_sequence(adev, golden_settings_gc_9_1,
+ ARRAY_SIZE(golden_settings_gc_9_1));
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rv2,
+ ARRAY_SIZE(golden_settings_gc_9_1_rv2));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rv1,
+ ARRAY_SIZE(golden_settings_gc_9_1_rv1));
+ break;
+ case IP_VERSION(9, 3, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rn,
+ ARRAY_SIZE(golden_settings_gc_9_1_rn));
+ return; /* for renoir, don't need common goldensetting */
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_4_2_init_golden_registers(adev,
+ adev->smuio.funcs->get_die_id(adev));
+ break;
+ default:
+ break;
+ }
+
+ if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
+ (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
+ soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
+ (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
+}
+
+static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.me_fw_write_wait = false;
+ adev->gfx.mec_fw_write_wait = false;
+
+ if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
+ ((adev->gfx.mec_fw_version < 0x000001a5) ||
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46)))
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 42) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b1) &&
+ (adev->gfx.pfp_feature_version >= 42))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000193) &&
+ (adev->gfx.mec_feature_version >= 42))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case IP_VERSION(9, 2, 1):
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 44) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b2) &&
+ (adev->gfx.pfp_feature_version >= 44))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000196) &&
+ (adev->gfx.mec_feature_version >= 44))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case IP_VERSION(9, 4, 0):
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 44) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b2) &&
+ (adev->gfx.pfp_feature_version >= 44))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000197) &&
+ (adev->gfx.mec_feature_version >= 44))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 42) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b1) &&
+ (adev->gfx.pfp_feature_version >= 42))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000192) &&
+ (adev->gfx.mec_feature_version >= 42))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ default:
+ adev->gfx.me_fw_write_wait = true;
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ }
+}
+
+struct amdgpu_gfxoff_quirk {
+ u16 chip_vendor;
+ u16 chip_device;
+ u16 subsys_vendor;
+ u16 subsys_device;
+ u8 revision;
+};
+
+static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
+ { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
+ /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+ /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ { 0, 0, 0, 0, 0 },
+};
+
+static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
+{
+ const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
+
+ while (p && p->chip_device != 0) {
+ if (pdev->vendor == p->chip_vendor &&
+ pdev->device == p->chip_device &&
+ pdev->subsystem_vendor == p->subsys_vendor &&
+ pdev->subsystem_device == p->subsys_device &&
+ pdev->revision == p->revision) {
+ return true;
+ }
+ ++p;
+ }
+ return false;
+}
+
+static bool is_raven_kicker(struct amdgpu_device *adev)
+{
+ if (adev->pm.fw_version >= 0x41e2b)
+ return true;
+ else
+ return false;
+}
+
+static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
+{
+ if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
+ (adev->gfx.me_fw_version >= 0x000000a5) &&
+ (adev->gfx.me_feature_version >= 52))
+ return true;
+ else
+ return false;
+}
+
+static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
+{
+ if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ break;
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
+ (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
+ ((!is_raven_kicker(adev) &&
+ adev->gfx.rlc_fw_version < 531) ||
+ (adev->gfx.rlc_feature_version < 1) ||
+ !adev->gfx.rlc.is_rlc_v2_1))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
+ case IP_VERSION(9, 3, 0):
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
+ char *chip_name)
+{
+ char fw_name[30];
+ int err;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ }
+ return err;
+}
+
+static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
+ char *chip_name)
+{
+ char fw_name[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+ uint32_t smu_version;
+
+ /*
+ * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
+ * instead of picasso_rlc.bin.
+ * Judgment method:
+ * PCO AM4: revision >= 0xC8 && revision <= 0xCF
+ * or revision >= 0xD8 && revision <= 0xDF
+ * otherwise is PCO FP5
+ */
+ if (!strcmp(chip_name, "picasso") &&
+ (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
+ ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+ else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
+ (smu_version >= 0x41e2b))
+ /**
+ *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
+ */
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
+ return err;
+}
+
+static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
+{
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
+ return false;
+
+ return true;
+}
+
+static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
+ char *chip_name)
+{
+ char fw_name[30];
+ int err;
+
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+
+ /* ignore failures to load */
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ if (!err) {
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
+ } else {
+ err = 0;
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+ }
+ } else {
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
+ }
+
+ gfx_v9_0_check_if_need_gfxoff(adev);
+ gfx_v9_0_check_fw_write_wait(adev);
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ return err;
+}
+
+static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int r;
+
+ DRM_DEBUG("\n");
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ /* No CPG in Arcturus */
+ if (adev->gfx.num_gfx_rings) {
+ r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t pg_always_on_cu_num = 2;
+ uint32_t always_on_cu_num;
+ uint32_t i, j, k;
+ uint32_t mask, cu_bitmap, counter;
+
+ if (adev->flags & AMD_IS_APU)
+ always_on_cu_num = 4;
+ else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
+ always_on_cu_num = 8;
+ else
+ always_on_cu_num = 12;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ cu_bitmap = 0;
+ counter = 0;
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ if (cu_info->bitmap[0][i][j] & mask) {
+ if (counter == pg_always_on_cu_num)
+ WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
+ if (counter < always_on_cu_num)
+ cu_bitmap |= mask;
+ else
+ break;
+ counter++;
+ }
+ mask <<= 1;
+ }
+
+ WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
+ cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
+ }
+ }
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
+
+ /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
+
+ /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+
+ /* set mmRLC_LB_PARAMS = 0x003F_1006 */
+ data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
+
+ /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
+ data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
+ data &= 0x0000FFFF;
+ data |= 0x00C00000;
+ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
+
+ /*
+ * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
+ * programmed in gfx_v9_0_init_always_on_cu_mask()
+ */
+
+ /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
+ * but used for RLC_LB_CNTL configuration */
+ data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_0_init_always_on_cu_mask(adev);
+}
+
+static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
+
+ /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
+
+ /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+
+ /* set mmRLC_LB_PARAMS = 0x003F_1006 */
+ data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
+
+ /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
+ data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
+ data &= 0x0000FFFF;
+ data |= 0x00C00000;
+ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
+
+ /*
+ * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
+ * programmed in gfx_v9_0_init_always_on_cu_mask()
+ */
+
+ /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
+ * but used for RLC_LB_CNTL configuration */
+ data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_0_init_always_on_cu_mask(adev);
+}
+
+static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
+{
+ WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
+}
+
+static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
+{
+ if (gfx_v9_0_load_mec2_fw_bin_support(adev))
+ return 5;
+ else
+ return 4;
+}
+
+static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx9_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->flags & AMD_IS_APU) {
+ /* TODO: double check the cp_table_size for RV */
+ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+ r = amdgpu_gfx_rlc_init_cpt(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ gfx_v9_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* type 1 wave data */
+ dst[(*no_fields)++] = 1;
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, simd, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc15_grbm_select(adev, me, pipe, q, vm, 0);
+}
+
+static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_0_select_se_sh,
+ .read_wave_data = &gfx_v9_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+};
+
+const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
+ .ras_error_inject = &gfx_v9_0_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
+};
+
+static struct amdgpu_gfx_ras gfx_v9_0_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_0_ras_ops,
+ },
+};
+
+static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+ int err;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case IP_VERSION(9, 2, 1):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
+ DRM_INFO("fix gfx.config for vega12\n");
+ break;
+ case IP_VERSION(9, 4, 0):
+ adev->gfx.ras = &gfx_v9_0_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ /* check vbios table if gpu info is not available */
+ err = amdgpu_atomfirmware_get_gfx_info(adev);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
+ else
+ gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case IP_VERSION(9, 4, 1):
+ adev->gfx.ras = &gfx_v9_4_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ break;
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22010042;
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.ras = &gfx_v9_4_2_ras;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ /* check vbios table if gpu info is not available */
+ err = amdgpu_atomfirmware_get_gfx_info(adev);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX9_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v9_0_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id;
+ struct amdgpu_ring *ring;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int hw_prio;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.mec.num_mec = 2;
+ break;
+ default:
+ adev->gfx.mec.num_mec = 1;
+ break;
+ }
+
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ /* ECC error */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ /* FUE error */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ if (adev->gfx.rlc.funcs) {
+ if (adev->gfx.rlc.funcs->init) {
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+ }
+ }
+
+ r = gfx_v9_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+ if (!i)
+ sprintf(ring->name, "gfx");
+ else
+ sprintf(ring->name, "gfx_%d", i);
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+ /* disable scheduler on the real ring */
+ ring->no_scheduler = true;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ /* set up the software rings */
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ ring = &adev->gfx.sw_gfx_ring[i];
+ ring->ring_obj = NULL;
+ sprintf(ring->name, amdgpu_sw_ring_name(i));
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ ring->is_sw_ring = true;
+ hw_prio = amdgpu_sw_ring_priority(i);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+ NULL);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ }
+
+ /* init the muxer and add software rings */
+ r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
+ GFX9_NUM_SW_GFX_RINGS);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+ return r;
+ }
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+ r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
+ &adev->gfx.sw_gfx_ring[i]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+ return r;
+ }
+ }
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
+
+ r = gfx_v9_0_compute_ring_init(adev,
+ ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[0];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
+ if (r)
+ return r;
+
+ adev->gfx.ce_ram_size = 0x8000;
+
+ r = gfx_v9_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_gfx_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int gfx_v9_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
+ amdgpu_ring_mux_fini(&adev->gfx.muxer);
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ gfx_v9_0_mec_fini(adev);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+ if (adev->flags & AMD_IS_APU) {
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+ }
+ gfx_v9_0_free_microcode(adev);
+
+ return 0;
+}
+
+
+static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ /* TODO */
+}
+
+void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
+ u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+ data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
+
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
+ data = gfx_v9_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+}
+
+static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ uint32_t trap_config_vmid_mask = 0;
+ int i;
+
+ /* Calculate trap config vmid mask */
+ for (i = first_vmid; i < last_vmid; i++)
+ trap_config_vmid_mask |= (1 << i);
+
+ data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+ VMID_SEL, trap_config_vmid_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+ TRAP_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
+ /* CP and shaders */
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ access. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 1):
+ tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+ !READ_ONCE(adev->barrier_has_auto_waitcnt));
+ WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v9_0_tiling_mode_table_init(adev);
+
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
+ gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_0_init_compute_vmid(adev);
+ gfx_v9_0_init_gds_vmid(adev);
+ gfx_v9_0_init_sq_config(adev);
+}
+
+static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ if (k == adev->usec_timeout) {
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff,
+ 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ i, j);
+ return;
+ }
+ }
+ }
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* These interrupts should be enabled to drive DS clock */
+
+ tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+ if(adev->gfx.num_gfx_rings)
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+ /* csib */
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
+ adev->gfx.rlc.clear_state_size);
+}
+
+static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
+ int indirect_offset,
+ int list_size,
+ int *unique_indirect_regs,
+ int unique_indirect_reg_count,
+ int *indirect_start_offsets,
+ int *indirect_start_offsets_count,
+ int max_start_offsets_count)
+{
+ int idx;
+
+ for (; indirect_offset < list_size; indirect_offset++) {
+ WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
+ indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+ *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
+
+ while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+ indirect_offset += 2;
+
+ /* look for the matching indice */
+ for (idx = 0; idx < unique_indirect_reg_count; idx++) {
+ if (unique_indirect_regs[idx] ==
+ register_list_format[indirect_offset] ||
+ !unique_indirect_regs[idx])
+ break;
+ }
+
+ BUG_ON(idx >= unique_indirect_reg_count);
+
+ if (!unique_indirect_regs[idx])
+ unique_indirect_regs[idx] = register_list_format[indirect_offset];
+
+ indirect_offset++;
+ }
+ }
+}
+
+static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
+{
+ int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
+ int unique_indirect_reg_count = 0;
+
+ int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
+ int indirect_start_offsets_count = 0;
+
+ int list_size = 0;
+ int i = 0, j = 0;
+ u32 tmp = 0;
+
+ u32 *register_list_format =
+ kmemdup(adev->gfx.rlc.register_list_format,
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+ if (!register_list_format)
+ return -ENOMEM;
+
+ /* setup unique_indirect_regs array and indirect_start_offsets array */
+ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+ gfx_v9_1_parse_ind_reg_list(register_list_format,
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+ adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+ unique_indirect_regs,
+ unique_indirect_reg_count,
+ indirect_start_offsets,
+ &indirect_start_offsets_count,
+ ARRAY_SIZE(indirect_start_offsets));
+
+ /* enable auto inc in case it is disabled */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
+
+ /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
+ RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
+ for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
+ adev->gfx.rlc.register_restore[i]);
+
+ /* load indirect register */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+ adev->gfx.rlc.reg_list_format_start);
+
+ /* direct register portion */
+ for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
+ register_list_format[i]);
+
+ /* indirect register portion */
+ while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+ if (register_list_format[i] == 0xFFFFFFFF) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ continue;
+ }
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+
+ for (j = 0; j < unique_indirect_reg_count; j++) {
+ if (register_list_format[i] == unique_indirect_regs[j]) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+ break;
+ }
+ }
+
+ BUG_ON(j >= unique_indirect_reg_count);
+
+ i++;
+ }
+
+ /* set save/restore list size */
+ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
+ list_size = list_size >> 1;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+ adev->gfx.rlc.reg_restore_list_size);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
+
+ /* write the starting offsets to RLC scratch ram */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
+ adev->gfx.rlc.starting_offsets_start);
+ for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
+ indirect_start_offsets[i]);
+
+ /* load unique indirect regs*/
+ for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
+ if (unique_indirect_regs[i] != 0) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+ + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+ unique_indirect_regs[i] & 0x3FFFF);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+ + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+ unique_indirect_regs[i] >> 20);
+ }
+ }
+
+ kfree(register_list_format);
+ return 0;
+}
+
+static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data = 0;
+ uint32_t default_data = 0;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
+ if (enable) {
+ /* enable GFXIP control over CGPG */
+ data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+
+ /* update status */
+ data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
+ data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+ } else {
+ /* restore GFXIP control over GCPG */
+ data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
+ }
+}
+
+static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG)) {
+ /* init IDLE_POLL_COUNT = 60 */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
+ data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
+ data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
+
+ /* init RLC PG Delay */
+ data = 0;
+ data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
+ data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
+ data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
+ data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
+ data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
+ data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
+ data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
+ data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
+ data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
+
+ /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
+ data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
+ pwr_10_0_gfxip_control_over_cgpg(adev, true);
+ }
+}
+
+static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data = 0;
+ uint32_t default_data = 0;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
+ enable ? 1 : 0);
+ if (default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data = 0;
+ uint32_t default_data = 0;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
+ enable ? 1 : 0);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data = 0;
+ uint32_t default_data = 0;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ CP_PG_DISABLE,
+ enable ? 0 : 1);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, default_data;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ GFX_POWER_GATING_ENABLE,
+ enable ? 1 : 0);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, default_data;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ GFX_PIPELINE_PG_ENABLE,
+ enable ? 1 : 0);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+
+ if (!enable)
+ /* read any GFX register to wake up GFX */
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
+}
+
+static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, default_data;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ STATIC_PER_CU_PG_ENABLE,
+ enable ? 1 : 0);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, default_data;
+
+ default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
+ data = REG_SET_FIELD(data, RLC_PG_CNTL,
+ DYN_PER_CU_PG_ENABLE,
+ enable ? 1 : 0);
+ if(default_data != data)
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
+}
+
+static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
+{
+ gfx_v9_0_init_csb(adev);
+
+ /*
+ * Rlc save restore list is workable since v2_1.
+ * And it's needed by gfxoff feature.
+ */
+ if (adev->gfx.rlc.is_rlc_v2_1) {
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
+ gfx_v9_1_init_rlc_save_restore_list(adev);
+ gfx_v9_0_enable_save_restore_machine(adev);
+ }
+
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
+ WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
+ adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v9_0_init_gfx_power_gating(adev);
+ }
+}
+
+static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
+ gfx_v9_0_enable_gui_idle_interrupt(adev, false);
+ gfx_v9_0_wait_for_rlc_serdes(adev);
+}
+
+static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
+{
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ u32 rlc_ucode_ver;
+#endif
+
+ WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU)) {
+ gfx_v9_0_enable_gui_idle_interrupt(adev, true);
+ udelay(50);
+ }
+
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
+ if(rlc_ucode_ver == 0x108) {
+ DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
+ }
+#endif
+}
+
+static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v9_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
+
+ gfx_v9_0_init_pg(adev);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* legacy rlc firmware loading */
+ r = gfx_v9_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ gfx_v9_0_init_lbpw(adev);
+ if (amdgpu_lbpw == 0)
+ gfx_v9_0_enable_lbpw(adev, false);
+ else
+ gfx_v9_0_enable_lbpw(adev, true);
+ break;
+ case IP_VERSION(9, 4, 0):
+ gfx_v9_4_init_lbpw(adev);
+ if (amdgpu_lbpw > 0)
+ gfx_v9_0_enable_lbpw(adev, true);
+ else
+ gfx_v9_0_enable_lbpw(adev, false);
+ break;
+ default:
+ break;
+ }
+
+ gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
+
+ adev->gfx.rlc.funcs->start(adev);
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
+ WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
+ udelay(50);
+}
+
+static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ gfx_v9_0_cp_gfx_enable(adev, false);
+
+ /* PFP */
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
+ WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+ /* CE */
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
+ WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
+ WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
+
+ /* ME */
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
+ WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
+ WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i, tmp;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
+
+ gfx_v9_0_cp_gfx_enable(adev, true);
+
+ r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring,
+ ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
+ tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+
+
+ /* start the ring */
+ gfx_v9_0_cp_gfx_start(adev);
+
+ return 0;
+}
+
+static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable) {
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
+ mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
+ adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+}
+
+static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ mqd->dynamic_cu_mask_addr_lo =
+ lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi =
+ upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a queue/ring */
+ gfx_v9_0_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ /* If GC has entered CGPG, ringing doorbell > first page
+ * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
+ * workaround this issue. And this change has to align with firmware
+ * update.
+ */
+ if (check_if_enlarge_doorbell_range(adev))
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell.size - 4));
+ else
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int j;
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("KIQ dequeue request failed.\n");
+
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
+ }
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ 0);
+ }
+
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ struct v9_mqd *tmp_mqd;
+
+ gfx_v9_0_kiq_setting(ring);
+
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ gfx_v9_0_kiq_init_register(ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ gfx_v9_0_mqd_init(ring);
+ gfx_v9_0_kiq_init_register(ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[0].mqd_backup)
+ memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
+
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ gfx_v9_0_mqd_init(ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v9_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v9_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v9_0_kcq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, 0);
+done:
+ return r;
+}
+
+static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v9_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.num_gfx_rings) {
+ /* legacy firmware loading */
+ r = gfx_v9_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ if (adev->gfx.num_gfx_rings) {
+ r = gfx_v9_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (adev->gfx.num_gfx_rings) {
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ amdgpu_ring_test_helper(ring);
+ }
+
+ gfx_v9_0_enable_gui_idle_interrupt(adev, true);
+
+ return 0;
+}
+
+static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
+ adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
+ adev->df.hash_status.hash_64k);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
+ adev->df.hash_status.hash_2m);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
+ adev->df.hash_status.hash_1g);
+ WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
+}
+
+static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_cp_gfx_enable(adev, enable);
+ gfx_v9_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v9_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfx_v9_0_init_golden_registers(adev);
+
+ gfx_v9_0_constants_init(adev);
+
+ gfx_v9_0_init_tcp_config(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ gfx_v9_4_2_set_power_brake_sequence(adev);
+
+ return r;
+}
+
+static int gfx_v9_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ /* DF freeze and kcq disable will fail */
+ if (!amdgpu_ras_intr_triggered())
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
+ amdgpu_gfx_disable_kcq(adev, 0);
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ /* must disable polling for SRIOV when hw finished, otherwise
+ * CPC engine may still keep fetching WB address which is already
+ * invalid after sw finished and trigger DMAR reading error in
+ * hypervisor side.
+ */
+ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ return 0;
+ }
+
+ /* Use deinitialize sequence from CAIL when unbinding device from driver,
+ * otherwise KIQ is hanging when binding back
+ */
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
+ adev->gfx.kiq[0].ring.pipe,
+ adev->gfx.kiq[0].ring.queue, 0, 0);
+ gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ gfx_v9_0_cp_enable(adev, false);
+
+ /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
+ if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
+ (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
+ dev_dbg(adev->dev, "Skipping RLC halt\n");
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+ return 0;
+}
+
+static int gfx_v9_0_suspend(void *handle)
+{
+ return gfx_v9_0_hw_fini(handle);
+}
+
+static int gfx_v9_0_resume(void *handle)
+{
+ return gfx_v9_0_hw_init(handle);
+}
+
+static bool gfx_v9_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v9_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v9_0_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v9_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ if (adev->gfx.num_gfx_rings)
+ /* Disable GFX parsing/prefetching */
+ gfx_v9_0_cp_gfx_enable(adev, false);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq, reg_val_offs = 0;
+ uint64_t value = 0;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_rreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+ pr_err("critical bug! too many kiq readers\n");
+ goto failed_unlock;
+ }
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 9 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 16) | /* count sel */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev)))
+ goto failed_kiq_read;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ mb();
+ value = (uint64_t)adev->wb.wb[reg_val_offs] |
+ (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ return value;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_read:
+ if (reg_val_offs)
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ pr_err("failed to read gpu clock\n");
+ return ~0;
+}
+
+static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock, clock_lo, clock_hi, hi_check;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 3, 0):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
+ default:
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
+ clock = gfx_v9_0_kiq_read_clock(adev);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ }
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+ break;
+ }
+ return clock;
+}
+
+static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static const u32 vgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x000000f8, 0xbf110800,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0x7e060280,
+ 0x7e080280, 0x7e0a0280,
+ 0x7e0c0280, 0x7e0e0280,
+ 0x80808800, 0xbe803200,
+ 0xbf84fff5, 0xbf9c0000,
+ 0xd28c0001, 0x0001007f,
+ 0xd28d0001, 0x0002027e,
+ 0x10020288, 0xb8810904,
+ 0xb7814000, 0xd1196a01,
+ 0x00000301, 0xbe800087,
+ 0xbefc00c1, 0xd89c4000,
+ 0x00020201, 0xd89cc080,
+ 0x00040401, 0x320202ff,
+ 0x00000800, 0x80808100,
+ 0xbf84fff8, 0x7e020280,
+ 0xbf810000, 0x00000000,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x0000005f, 0xbee50080,
+ 0xbe812c65, 0xbe822c65,
+ 0xbe832c65, 0xbe842c65,
+ 0xbe852c65, 0xb77c0005,
+ 0x80808500, 0xbf84fff8,
+ 0xbe800080, 0xbf810000,
+};
+
+static const u32 vgpr_init_compute_shader_arcturus[] = {
+ 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
+ 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
+ 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
+ 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
+ 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
+ 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
+ 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
+ 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
+ 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
+ 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
+ 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
+ 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
+ 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
+ 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
+ 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
+ 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
+ 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
+ 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
+ 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
+ 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
+ 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
+ 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
+ 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
+ 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
+ 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
+ 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
+ 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
+ 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
+ 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
+ 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
+ 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
+ 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
+ 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
+ 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
+ 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
+ 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
+ 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
+ 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
+ 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
+ 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
+ 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
+ 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
+ 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
+ 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
+ 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
+ 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
+ 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
+ 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
+ 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
+ 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
+ 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
+ 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
+ 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
+ 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
+ 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
+ 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
+ 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
+ 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
+ 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
+ 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
+ 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
+ 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
+ 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
+ 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
+ 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
+ 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
+ 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
+ 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
+ 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
+ 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
+ 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
+ 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
+ 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
+ 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
+ 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
+ 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
+ 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
+ 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
+ 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
+ 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
+ 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
+ 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
+ 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
+ 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
+ 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
+ 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
+ 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
+ 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
+ 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
+ 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
+ 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
+ 0xbf84fff8, 0xbf810000,
+};
+
+/* When below register arrays changed, please update gpr_reg_size,
+ and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
+ to cover all gfx9 ASICs */
+static const struct soc15_reg_entry vgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const struct soc15_reg_entry sgpr1_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
+};
+
+static const struct soc15_reg_entry sgpr2_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
+};
+
+static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
+};
+
+static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ int i, r;
+
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 7);
+ if (r) {
+ DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
+ ring->name, r);
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
+ amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
+ PACKET3_DMA_DATA_DST_SEL(1) |
+ PACKET3_DMA_DATA_SRC_SEL(2) |
+ PACKET3_DMA_DATA_ENGINE(0)));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
+ adev->gds.gds_size);
+
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
+
+ return r;
+}
+
+static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ int r, i;
+ unsigned total_size, vgpr_offset, sgpr_offset;
+ u64 gpu_addr;
+
+ int compute_dim_x = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se;
+ int sgpr_work_group_size = 5;
+ int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
+ int vgpr_init_shader_size;
+ const u32 *vgpr_init_shader_ptr;
+ const struct soc15_reg_entry *vgpr_init_regs_ptr;
+
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ /* bail if the compute ring is not ready */
+ if (!ring->sched.ready)
+ return 0;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
+ vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
+ vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
+ } else {
+ vgpr_init_shader_ptr = vgpr_init_compute_shader;
+ vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
+ vgpr_init_regs_ptr = vgpr_init_regs;
+ }
+
+ total_size =
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
+ total_size +=
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
+ total_size +=
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
+ total_size = ALIGN(total_size, 256);
+ vgpr_offset = total_size;
+ total_size += ALIGN(vgpr_init_shader_size, 256);
+ sgpr_offset = total_size;
+ total_size += sizeof(sgpr_init_compute_shader);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
+ ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
+
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
+ ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
+
+ /* init the ib length to 0 */
+ ib.length_dw = 0;
+
+ /* VGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < gpr_reg_size; i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR1 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < gpr_reg_size; i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR2 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < gpr_reg_size; i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r) {
+ DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(f, false);
+ if (r) {
+ DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ goto fail;
+ }
+
+fail:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+
+ return r;
+}
+
+static int gfx_v9_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ adev->gfx.num_gfx_rings = 0;
+ else
+ adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.xcc_mask = 1;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ gfx_v9_0_set_kiq_pm4_funcs(adev);
+ gfx_v9_0_set_ring_funcs(adev);
+ gfx_v9_0_set_irq_funcs(adev);
+ gfx_v9_0_set_gds_init(adev);
+ gfx_v9_0_set_rlc_funcs(adev);
+
+ /* init rlcg reg access ctrl */
+ gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_0_init_microcode(adev);
+}
+
+static int gfx_v9_0_ecc_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ /*
+ * Temp workaround to fix the issue that CP firmware fails to
+ * update read pointer when CPDMA is writing clearing operation
+ * to GDS in suspend/resume sequence on several cards. So just
+ * limit this operation in cold boot sequence.
+ */
+ if ((!adev->in_suspend) &&
+ (adev->gds.gds_size)) {
+ r = gfx_v9_0_do_edc_gds_workarounds(adev);
+ if (r)
+ return r;
+ }
+
+ /* requires IBs so do in late init after IB pool is initialized */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+ else
+ r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
+ if (r)
+ return r;
+
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
+
+ return 0;
+}
+
+static int gfx_v9_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_ecc_late_init(handle);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ gfx_v9_4_2_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+ else
+ gfx_v9_0_debug_trap_config_init(adev,
+ adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
+
+ return 0;
+}
+
+static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return false;
+
+ return true;
+}
+
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+}
+
+static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+ gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
+ } else {
+ gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ /* TODO: double check if we need to perform under safe mode */
+ /* gfx_v9_0_enter_rlc_safe_mode(adev); */
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
+ else
+ gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+ gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+ else
+ gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+ /* gfx_v9_0_exit_rlc_safe_mode(adev); */
+}
+
+static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ /* only for Vega10 & Raven1 */
+ data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ if (!adev->gfx.num_gfx_rings)
+ return;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* Enable 3D CGCG/CGLS */
+ if (enable) {
+ /* write cmd to clear cgcg/cgls ov */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable 3Dcgcg FSM(0x0000363f) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ else
+ data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ /* Disable CGCG/CGLS */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ /* disable cgcg, cgls should be disabled */
+ data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ else
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (enable) {
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v9_0_update_3d_clock_gating(adev, enable);
+ /* === CGCG + CGLS === */
+ gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v9_0_update_3d_clock_gating(adev, enable);
+ /* === MGCG + MGLS === */
+ gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
+ }
+ return 0;
+}
+
+static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+ unsigned int vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+}
+
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
+{
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ gfx_v9_0_update_spm_vmid_internal(adev, vmid);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v9_0_check_rlcg_range(adev, offset,
+ (void *)rlcg_access_gc_9_0,
+ ARRAY_SIZE(rlcg_access_gc_9_0));
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v9_0_set_safe_mode,
+ .unset_safe_mode = gfx_v9_0_unset_safe_mode,
+ .init = gfx_v9_0_rlc_init,
+ .get_csb_size = gfx_v9_0_get_csb_size,
+ .get_csb_buffer = gfx_v9_0_get_csb_buffer,
+ .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
+ .resume = gfx_v9_0_rlc_resume,
+ .stop = gfx_v9_0_rlc_stop,
+ .reset = gfx_v9_0_rlc_reset,
+ .start = gfx_v9_0_rlc_start,
+ .update_spm_vmid = gfx_v9_0_update_spm_vmid,
+ .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
+};
+
+static int gfx_v9_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+ gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
+ gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
+ } else {
+ gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
+ gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+ gfx_v9_0_enable_cp_power_gating(adev, true);
+ else
+ gfx_v9_0_enable_cp_power_gating(adev, false);
+
+ /* update gfx cgpg state */
+ gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
+
+ /* update mgcg state */
+ gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+ break;
+ case IP_VERSION(9, 2, 1):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+ }
+}
+
+static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
+}
+
+static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ }
+}
+
+static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
+ gfx_v9_0_ring_emit_de_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ?
+ true : false,
+ job->gds_size > 0 && job->gds_base != 0);
+ }
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_ib_on_emit_cntl(ring);
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ u32 control = ring->ring[offset];
+
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+ ring->ring[offset] = control;
+}
+
+static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *ce_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_ce_ib_state);
+
+ if (ring->is_mes_queue) {
+ payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+ } else {
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+ }
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
+static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *de_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_de_ib_state);
+
+ if (ring->is_mes_queue) {
+ payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+ } else {
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+ }
+
+ ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
+ IB_COMPLETION_STATUS_PREEMPTED;
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
+static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+ uint32_t dw2 = 0;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+
+ if (writeback) {
+ dw2 = EOP_TC_NC_ACTION_EN;
+ } else {
+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+ EOP_TC_MD_ACTION_EN;
+ }
+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5);
+ if (exec)
+ dw2 |= EOP_EXEC;
+
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else{
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_ce_ib_state ce_payload = {0};
+ uint64_t offset, ce_payload_gpu_addr;
+ void *ce_payload_cpu_addr;
+ int cnt;
+
+ cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ce_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_ce(ring);
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 13);
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ring->trail_seq);
+
+ amdgpu_ring_commit(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*ring->trail_fence_cpu_addr))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+ }
+
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
+ amdgpu_ring_commit(ring);
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_de_ib_state de_payload = {0};
+ uint64_t offset, gds_addr, de_payload_gpu_addr;
+ void *de_payload_cpu_addr;
+ int cnt;
+
+ if (ring->is_mes_queue) {
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr =
+ amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gds_backup) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ } else {
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+ }
+
+ if (usegds) {
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ }
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+ amdgpu_ring_ib_on_emit_de(ring);
+ if (resume)
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ gfx_v9_0_ring_emit_ce_meta(ring,
+ (!amdgpu_sriov_vf(ring->adev) &&
+ flags & AMDGPU_IB_PREEMPTED) ? true : false);
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ return ret;
+}
+
+static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
+}
+
+static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
+ adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+}
+
+static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
+ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
+ CP_ECC_ERROR_INT_ENABLE, 1)
+
+#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
+ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
+ CP_ECC_ERROR_INT_ENABLE, 0)
+
+static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ CP_ECC_ERROR_INT_ENABLE, 0);
+ DISABLE_ECC_ON_ME_PIPE(1, 0);
+ DISABLE_ECC_ON_ME_PIPE(1, 1);
+ DISABLE_ECC_ON_ME_PIPE(1, 2);
+ DISABLE_ECC_ON_ME_PIPE(1, 3);
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ CP_ECC_ERROR_INT_ENABLE, 1);
+ ENABLE_ECC_ON_ME_PIPE(1, 0);
+ ENABLE_ECC_ON_ME_PIPE(1, 1);
+ ENABLE_ECC_ON_ME_PIPE(1, 2);
+ ENABLE_ECC_ON_ME_PIPE(1, 3);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (adev->gfx.num_gfx_rings &&
+ !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+ /* Fence signals are handled on the software rings*/
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_0_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v9_0_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v9_0_fault(adev, entry);
+ return 0;
+}
+
+
+static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
+ },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
+ },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
+ 0, 0
+ },
+ { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
+ 0, 0
+ },
+ { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
+ },
+ { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
+ },
+ { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
+ 0, 0
+ },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
+ },
+ { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
+ 0, 0
+ },
+ { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
+ },
+ { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
+ },
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
+ },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
+ },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
+ },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
+ },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
+ },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
+ },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ 0, 0
+ },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
+ },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
+ },
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
+ },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
+ },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
+ },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
+ },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
+ },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
+ },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
+ },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
+ },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
+ },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
+ },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
+ },
+ { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ 0, 0
+ }
+};
+
+static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if, uint32_t instance_mask)
+{
+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
+ int ret;
+ struct ta_ras_trigger_error_input block_info = { 0 };
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
+ return -EINVAL;
+
+ if (!ras_gfx_subblocks[info->head.sub_block_index].name)
+ return -EPERM;
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
+ return -EPERM;
+ }
+
+ if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
+ info->head.type)) {
+ DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
+ ras_gfx_subblocks[info->head.sub_block_index].name,
+ info->head.type);
+ return -EPERM;
+ }
+
+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
+ block_info.sub_block_index =
+ ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
+ block_info.address = info->address;
+ block_info.value = info->value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return ret;
+}
+
+static const char *vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+};
+
+static const char *vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_LOG_FIFO",
+};
+
+static const char *atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char *atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "SEC %d\n", i, vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "DED %d\n", i, vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "SEC %d\n", i, vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "DED %d\n", i, vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "SEC %d\n", i, atc_l2_cache_2m_mems[i],
+ sec_count);
+ err_data->ce_count += sec_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "SEC %d\n", i, atc_l2_cache_4k_mems[i],
+ sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = (data & 0x00018000L) >> 0xf;
+ if (ded_count) {
+ dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
+ "DED %d\n", i, atc_l2_cache_4k_mems[i],
+ ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id, uint32_t value,
+ uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
+ if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_0_ras_fields[i].seg != reg->seg ||
+ gfx_v9_0_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value &
+ gfx_v9_0_ras_fields[i].sec_count_mask) >>
+ gfx_v9_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "GFX SubBlock %s, "
+ "Instance[%d][%d], SEC %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ gfx_v9_0_ras_fields[i].ded_count_mask) >>
+ gfx_v9_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev, "GFX SubBlock %s, "
+ "Instance[%d][%d], DED %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ /* read back registers to clear the counters */
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+}
+
+static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_0_ras_error_count(adev,
+ &gfx_v9_0_edc_counter_regs[i],
+ j, k, reg_value,
+ &sec_count, &ded_count);
+ }
+ }
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_0_query_utc_edc_status(adev, err_data);
+}
+
+static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+ val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
+ break;
+ case 1:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
+ break;
+ case 2:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
+ break;
+ case 3:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+
+ /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
+
+ }
+}
+
+static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
+ .name = "gfx_v9_0",
+ .early_init = gfx_v9_0_early_init,
+ .late_init = gfx_v9_0_late_init,
+ .sw_init = gfx_v9_0_sw_init,
+ .sw_fini = gfx_v9_0_sw_fini,
+ .hw_init = gfx_v9_0_hw_init,
+ .hw_fini = gfx_v9_0_hw_fini,
+ .suspend = gfx_v9_0_suspend,
+ .resume = gfx_v9_0_resume,
+ .is_idle = gfx_v9_0_is_idle,
+ .wait_for_idle = gfx_v9_0_wait_for_idle,
+ .soft_reset = gfx_v9_0_soft_reset,
+ .set_clockgating_state = gfx_v9_0_set_clockgating_state,
+ .set_powergating_state = gfx_v9_0_set_powergating_state,
+ .get_clockgating_state = gfx_v9_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ the first COND_EXEC jump to the place just
+ prior to this double SWITCH_BUFFER */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7, /* gfx_v9_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+ .preempt_ib = gfx_v9_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+ .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+ .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place just
+ * prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7, /* gfx_v9_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_sw_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .patch_cntl = gfx_v9_0_ring_patch_cntl,
+ .patch_de = gfx_v9_0_ring_patch_de_meta,
+ .patch_ce = gfx_v9_0_ring_patch_ce_meta,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_0_ring_emit_gds_switch */
+ 7 + /* gfx_v9_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
+ 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_0_emit_wave_limit,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_0_ring_emit_gds_switch */
+ 7 + /* gfx_v9_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
+ .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_0_ring_emit_rreg,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+
+ if (adev->gfx.num_gfx_rings) {
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
+ .set = gfx_v9_0_set_eop_interrupt_state,
+ .process = gfx_v9_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
+ .set = gfx_v9_0_set_priv_reg_fault_state,
+ .process = gfx_v9_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
+ .set = gfx_v9_0_set_priv_inst_fault_state,
+ .process = gfx_v9_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
+ .set = gfx_v9_0_set_cp_ecc_error_state,
+ .process = amdgpu_gfx_cp_ecc_error_irq,
+};
+
+
+static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
+
+ adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
+ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
+}
+
+static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ adev->gds.gds_size = 0x10000;
+ break;
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 4, 1):
+ adev->gds.gds_size = 0x1000;
+ break;
+ case IP_VERSION(9, 4, 2):
+ /* aldebaran removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ adev->gds.gds_size = 0;
+ break;
+ default:
+ adev->gds.gds_size = 0x10000;
+ break;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 4, 0):
+ adev->gds.gds_compute_max_wave_id = 0x7ff;
+ break;
+ case IP_VERSION(9, 2, 1):
+ adev->gds.gds_compute_max_wave_id = 0x27f;
+ break;
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
+ else
+ adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
+ break;
+ case IP_VERSION(9, 4, 1):
+ adev->gds.gds_compute_max_wave_id = 0xfff;
+ break;
+ case IP_VERSION(9, 4, 2):
+ /* deprecated for Aldebaran, no usage at all */
+ adev->gds.gds_compute_max_wave_id = 0;
+ break;
+ default:
+ /* this really depends on the chip */
+ adev->gds.gds_compute_max_wave_id = 0x7ff;
+ break;
+ }
+
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ unsigned disable_masks[4 * 4];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
+ gfx_v9_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
+ bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
+
+ /*
+ * The bitmap(and ao_cu_bitmap) in cu_info structure is
+ * 4x4 size array, and it's usually suitable for Vega
+ * ASICs which has 4*2 SE/SH layout.
+ * But for Arcturus, SE/SH layout is changed to 8*1.
+ * To mostly reduce the impact, we make it compatible
+ * with current bitmap array as below:
+ * SE4,SH0 --> bitmap[0][1]
+ * SE5,SH0 --> bitmap[1][1]
+ * SE6,SH0 --> bitmap[2][1]
+ * SE7,SH0 --> bitmap[3][1]
+ */
+ cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter ++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
+ }
+ }
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v9_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
new file mode 100644
index 000000000..f9f6edc5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_0_H__
+#define __GFX_V9_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
+
+void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
+ u32 instance, int xcc_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
new file mode 100644
index 000000000..bc8416afb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_pm.h"
+
+#include "gc/gc_9_4_1_offset.h"
+#include "gc/gc_9_4_1_sh_mask.h"
+#include "soc15_common.h"
+
+#include "gfx_v9_4.h"
+#include "amdgpu_ras.h"
+
+static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = {
+ /* CPC */
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 },
+ /* DC */
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 },
+ /* CPF */
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 },
+ /* GDS */
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 },
+ /* SPI */
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 },
+ /* SQ */
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 8, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 8, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 8, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 8, 16 },
+ /* SQC */
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 },
+ /* TA */
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 },
+ /* TCA */
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 },
+ /* TCC */
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 },
+ /* TCI */
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 },
+ /* TCP */
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 },
+ /* TD */
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 },
+ /* GCEA */
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 },
+ /* RLC */
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 },
+};
+
+static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = {
+ /* CPC */
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_RESTORE_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) },
+ { "CPC_DC_RESTORE_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) },
+
+ /* CPF */
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) },
+ { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) },
+
+ /* GDS */
+ { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+
+ /* SPI */
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) },
+ { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) },
+ { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) },
+ { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) },
+ { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) },
+
+ /* SQ */
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+
+ /* SQC */
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+
+ /* TA */
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) },
+
+ /* TCA */
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) },
+
+ /* TCC */
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) },
+
+ /* TCI */
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) },
+
+ /* TCP */
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+
+ /* TD */
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) },
+
+ /* EA */
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) },
+ { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) },
+ { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) },
+ { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) },
+ { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) },
+ { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) },
+ { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) },
+ { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) },
+ { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) },
+
+ /* RLC */
+ { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) },
+ { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) },
+ { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) },
+ { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) },
+ { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) },
+ { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) },
+ { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) },
+};
+
+static const char * const vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+ "UTC_VML2_IFIFO_GROUP0",
+ "UTC_VML2_IFIFO_GROUP1",
+ "UTC_VML2_IFIFO_GROUP2",
+ "UTC_VML2_IFIFO_GROUP3",
+ "UTC_VML2_IFIFO_GROUP4",
+ "UTC_VML2_IFIFO_GROUP5",
+ "UTC_VML2_IFIFO_GROUP6",
+ "UTC_VML2_IFIFO_GROUP7",
+ "UTC_VML2_IFIFO_GROUP8",
+ "UTC_VML2_IFIFO_GROUP9",
+ "UTC_VML2_IFIFO_GROUP10",
+ "UTC_VML2_IFIFO_GROUP11",
+ "UTC_VML2_IFIFO_GROUP12",
+ "UTC_VML2_IFIFO_GROUP13",
+ "UTC_VML2_IFIFO_GROUP14",
+ "UTC_VML2_IFIFO_GROUP15",
+ "UTC_VML2_IFIFO_GROUP16",
+ "UTC_VML2_IFIFO_GROUP17",
+ "UTC_VML2_IFIFO_GROUP18",
+ "UTC_VML2_IFIFO_GROUP19",
+ "UTC_VML2_IFIFO_GROUP20",
+ "UTC_VML2_IFIFO_GROUP21",
+ "UTC_VML2_IFIFO_GROUP22",
+ "UTC_VML2_IFIFO_GROUP23",
+ "UTC_VML2_IFIFO_GROUP24",
+};
+
+static const char * const vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_ARADDRS",
+ "UTC_VML2_RDIF_LOG_FIFO",
+ "UTC_VML2_QUEUE_REQ",
+ "UTC_VML2_QUEUE_RET",
+};
+
+static const char * const utcl2_router_mems[] = {
+ "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0",
+ "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1",
+ "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2",
+ "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3",
+ "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4",
+ "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5",
+ "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6",
+ "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7",
+ "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8",
+ "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9",
+ "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10",
+ "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11",
+ "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12",
+ "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13",
+ "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14",
+ "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15",
+ "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16",
+ "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17",
+ "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18",
+ "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19",
+ "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20",
+ "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21",
+ "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22",
+ "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23",
+ "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24",
+};
+
+static const char * const atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char * const atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, SEC %d\n", i,
+ utcl2_router_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, DED %d\n", i,
+ utcl2_router_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_2m_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_2m_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_4k_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ dev_info(adev->dev,
+ "Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_4k_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id,
+ uint32_t value, uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) {
+ if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_4_ras_fields[i].seg != reg->seg ||
+ gfx_v9_4_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >>
+ gfx_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >>
+ gfx_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0, k);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_4_ras_error_count(adev,
+ &gfx_v9_4_edc_counter_regs[i],
+ j, k, reg_value, &sec_count,
+ &ded_count);
+ }
+ }
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_4_query_utc_edc_status(adev, err_data);
+
+}
+
+static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+}
+
+static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
+
+static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < gfx_v9_4_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_ea_err_status_regs.instance;
+ j++) {
+ gfx_v9_4_select_se_sh(adev, i, 0, j);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_ea_err_status_regs));
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ /* SDP read/write error/parity error in FUE_IS_FATAL mode
+ * can cause system fatal error in arcturas. Harvest the error
+ * status before GPU reset */
+ dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
+ j, reg_value);
+ }
+ }
+ }
+
+ gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+
+const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_query_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_ras_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
new file mode 100644
index 000000000..ca520a767
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_H__
+#define __GFX_V9_4_H__
+
+extern struct amdgpu_gfx_ras gfx_v9_4_ras;
+
+#endif /* __GFX_V9_4_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
new file mode 100644
index 000000000..63f6843a0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -0,0 +1,1940 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include "gfx_v9_0.h"
+
+#include "gfx_v9_4_2.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_gfx.h"
+
+#define SE_ID_MAX 8
+#define CU_ID_MAX 16
+#define SIMD_ID_MAX 4
+#define WAVE_ID_MAX 10
+
+enum gfx_v9_4_2_utc_type {
+ VML2_MEM,
+ VML2_WALKER_MEM,
+ UTCL2_MEM,
+ ATC_L2_CACHE_2M,
+ ATC_L2_CACHE_32K,
+ ATC_L2_CACHE_4K
+};
+
+struct gfx_v9_4_2_utc_block {
+ enum gfx_v9_4_2_utc_type type;
+ uint32_t num_banks;
+ uint32_t num_ways;
+ uint32_t num_mem_blocks;
+ struct soc15_reg idx_reg;
+ struct soc15_reg data_reg;
+ uint32_t sec_count_mask;
+ uint32_t sec_count_shift;
+ uint32_t ded_count_mask;
+ uint32_t ded_count_shift;
+ uint32_t clear;
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde_die_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_0, 0x3fffffff, 0x141dc920),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_1, 0x3fffffff, 0x3b458b93),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_2, 0x3fffffff, 0x1a4f5583),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_3, 0x3fffffff, 0x317717f6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_4, 0x3fffffff, 0x107cc1e6),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_5, 0x3ff, 0x351),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde_die_1[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_0, 0x3fffffff, 0x2591aa38),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_1, 0x3fffffff, 0xac9e88b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_2, 0x3fffffff, 0x2bc3369b),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_3, 0x3fffffff, 0xfb74ee),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_4, 0x3fffffff, 0x21f0a2fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CHAN_STEER_5, 0x3ff, 0x49),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_UTCL1_CNTL1, 0xffffffff, 0x30800400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
+};
+
+/*
+ * This shader is used to clear VGPRS and LDS, and also write the input
+ * pattern into the write back buffer, which will be used by driver to
+ * check whether all SIMDs have been covered.
+*/
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
+ 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
+ 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
+ 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
+ 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
+ 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
+ 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
+ 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
+ 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
+ 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
+ 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
+ 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
+ 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
+ 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
+ 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
+ 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
+ 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
+ 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
+ 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
+ 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
+ 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
+ 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
+ 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
+ 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
+ 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
+ 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
+ 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
+ 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
+ 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
+ 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
+ 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
+ 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
+ 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
+ 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
+ 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
+ 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
+ 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
+ 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
+ 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
+ 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
+ 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
+ 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
+ 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
+ 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
+ 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
+ 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
+ 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
+ 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
+ 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
+ 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
+ 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
+ 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
+ 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
+ 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
+ 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
+ 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
+ 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
+ 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
+ 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
+ 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
+ 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
+ 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
+ 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
+ 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
+ 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
+ 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
+ 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
+ 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
+ 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
+ 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
+ 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
+ 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
+ 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
+ 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
+ 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
+ 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
+ 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
+ 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
+ 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
+ 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
+ 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
+ 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
+ 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
+ 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
+ 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
+ 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
+ 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
+ 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
+ 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
+ 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
+ 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/*
+ * The below shaders are used to clear SGPRS, and also write the input
+ * pattern into the write back buffer. The first two dispatch should be
+ * scheduled simultaneously which make sure that all SGPRS could be
+ * allocated, so the dispatch 1 need check write back buffer before scheduled,
+ * make sure that waves of dispatch 0 are all dispacthed to all simds
+ * balanced. both dispatch 0 and dispatch 1 should be halted until all waves
+ * are dispatched, and then driver write a pattern to the shared memory to make
+ * all waves continue.
+*/
+static const u32 sgpr112_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
+ 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
+ 0xbf810000
+};
+
+const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const u32 sgpr96_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/*
+ * This shader is used to clear the uninitiated sgprs after the above
+ * two dispatches, because of hardware feature, dispath 0 couldn't clear
+ * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
+*/
+static const u32 sgpr64_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ const u32 *shader_ptr, u32 shader_size,
+ const struct soc15_reg_entry *init_regs, u32 regs_size,
+ u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
+ struct dma_fence **fence_ptr)
+{
+ int r, i;
+ uint32_t total_size, shader_offset;
+ u64 gpu_addr;
+
+ total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
+ total_size = ALIGN(total_size, 256);
+ shader_offset = total_size;
+ total_size += ALIGN(shader_size, 256);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < shader_size/sizeof(u32); i++)
+ ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
+
+ /* init the ib length to 0 */
+ ib->length_dw = 0;
+
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < regs_size; i++) {
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
+ }
+
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write the wb buffer address */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = pattern;
+
+ /* write dispatch packet */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
+ ib->ptr[ib->length_dw++] = 1; /* y */
+ ib->ptr[ib->length_dw++] = 1; /* z */
+ ib->ptr[ib->length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
+ if (r) {
+ dev_err(adev->dev, "ib submit failed (%d).\n", r);
+ amdgpu_ib_free(adev, ib, NULL);
+ }
+ return r;
+}
+
+static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t offset = 0;
+ char *str;
+ int size;
+
+ str = kmalloc(256, GFP_KERNEL);
+ if (!str)
+ return;
+
+ dev_dbg(adev->dev, "wave assignment:\n");
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
+ for (cu = 0; cu < CU_ID_MAX; cu++) {
+ memset(str, 0, 256);
+ size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
+ for (simd = 0; simd < SIMD_ID_MAX; simd++) {
+ size += sprintf(str + size, "[");
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ size += sprintf(str + size, "%x", wb_ptr[offset]);
+ offset++;
+ }
+ size += sprintf(str + size, "] ");
+ }
+ dev_dbg(adev->dev, "%s\n", str);
+ }
+ }
+
+ kfree(str);
+}
+
+static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
+ uint32_t *wb_ptr, uint32_t mask,
+ uint32_t pattern, uint32_t num_wave, bool wait)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t loop = 0;
+ uint32_t wave_cnt;
+ uint32_t offset;
+
+ do {
+ wave_cnt = 0;
+ offset = 0;
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
+ for (cu = 0; cu < CU_ID_MAX; cu++)
+ for (simd = 0; simd < SIMD_ID_MAX; simd++)
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ if (((1 << wave) & mask) &&
+ (wb_ptr[offset] == pattern))
+ wave_cnt++;
+
+ offset++;
+ }
+
+ if (wave_cnt == num_wave)
+ return 0;
+
+ mdelay(1);
+ } while (++loop < 2000 && wait);
+
+ dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
+ wave_cnt, num_wave);
+
+ gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
+
+ return -EBADSLT;
+}
+
+static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ibs[3];
+ struct dma_fence *fences[3];
+ u32 pattern[3] = { 0x1, 0x5, 0xa };
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready ||
+ !adev->gfx.compute_ring[1].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[0],
+ sgpr112_init_compute_shader_aldebaran,
+ sizeof(sgpr112_init_compute_shader_aldebaran),
+ sgpr112_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr112_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[0], &fences[0]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 224 sgprs\n");
+ goto pro_end;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11,
+ pattern[0],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[1],
+ &disp_ibs[1],
+ sgpr96_init_compute_shader_aldebaran,
+ sizeof(sgpr96_init_compute_shader_aldebaran),
+ sgpr96_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr96_init_regs_aldebaran),
+ adev->gfx.cu_info.number * 2,
+ wb_ib.gpu_addr, pattern[1], &fences[1]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear next 576 sgprs\n");
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11111100,
+ pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp1_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fences[0], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = dma_fence_wait(fences[1], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
+ goto disp1_failed;
+ }
+
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[2],
+ sgpr64_init_compute_shader_aldebaran,
+ sizeof(sgpr64_init_compute_shader_aldebaran),
+ sgpr64_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr64_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[2], &fences[2]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 256 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1111,
+ pattern[2],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp2_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ r = dma_fence_wait(fences[2], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
+ goto disp2_failed;
+ }
+
+disp2_failed:
+ amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ dma_fence_put(fences[2]);
+disp1_failed:
+ amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ dma_fence_put(fences[1]);
+disp0_failed:
+ amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ dma_fence_put(fences[0]);
+pro_end:
+ amdgpu_ib_free(adev, &wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init SGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init SGPRS Successfully\n");
+
+ return r;
+}
+
+static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ib;
+ struct dma_fence *fence;
+ u32 pattern = 0xa;
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ib,
+ vgpr_init_compute_shader_aldebaran,
+ sizeof(vgpr_init_compute_shader_aldebaran),
+ vgpr_init_regs_aldebaran,
+ ARRAY_SIZE(vgpr_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern, &fence);
+ if (r) {
+ dev_err(adev->dev, "failed to clear vgprs\n");
+ goto pro_end;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fence, false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear vgprs\n");
+ goto disp_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1,
+ pattern,
+ adev->gfx.cu_info.number * SIMD_ID_MAX,
+ false);
+ if (r) {
+ dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
+ goto disp_failed;
+ }
+
+disp_failed:
+ amdgpu_ib_free(adev, &disp_ib, NULL);
+ dma_fence_put(fence);
+pro_end:
+ amdgpu_ib_free(adev, &wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init VGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init VGPRS Successfully\n");
+
+ return r;
+}
+
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ /* Workaround for ALDEBARAN, skip GPRs init in GPU reset.
+ Will remove it once GPRs init algorithm works for all CU settings. */
+ if (amdgpu_in_reset(adev))
+ return 0;
+
+ gfx_v9_4_2_do_sgprs_init(adev);
+
+ gfx_v9_4_2_do_vgprs_init(adev);
+
+ return 0;
+}
+
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
+
+void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t die_id)
+{
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde));
+
+ /* apply golden settings per die */
+ switch (die_id) {
+ case 0:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde_die_0,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde_die_0));
+ break;
+ case 1:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_4_2_alde_die_1,
+ ARRAY_SIZE(golden_settings_gc_9_4_2_alde_die_1));
+ break;
+ default:
+ dev_warn(adev->dev,
+ "invalid die id %d, ignore channel fabricid remap settings\n",
+ die_id);
+ break;
+ }
+
+ return;
+}
+
+void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid,
+ uint32_t last_vmid)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ for (i = first_vmid; i < last_vmid; i++) {
+ data = 0;
+ soc15_grbm_select(adev, 0, 0, 0, i, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE,
+ 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA0), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA1), 0);
+}
+
+void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1);
+ WREG32_SOC15(GC, 0, regGC_THROTTLE_CTRL, tmp);
+
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL1, PWRBRK_STALL_EN, 1);
+ WREG32_SOC15(GC, 0, regGC_THROTTLE_CTRL1, tmp);
+
+ WREG32_SOC15(GC, 0, regGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL);
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, PWRBRK_STALL_PATTERN_CTRL, PWRBRK_END_STEP, 0x12);
+ WREG32_SOC15(GC, 0, regGC_CAC_IND_DATA, tmp);
+}
+
+static const struct soc15_reg_entry gfx_v9_4_2_edc_counter_regs[] = {
+ /* CPF */
+ { SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCPF_EDC_TAG_CNT), 0, 1, 1 },
+ /* CPC */
+ { SOC15_REG_ENTRY(GC, 0, regCPC_EDC_SCRATCH_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCPC_EDC_UCODE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_STATE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT), 0, 1, 1 },
+ /* GDS */
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_GRBM_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT), 0, 1, 1 },
+ /* RLC */
+ { SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2), 0, 1, 1 },
+ /* SPI */
+ { SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT), 0, 8, 1 },
+ /* SQC */
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3), 0, 8, 7 },
+ { SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3), 0, 8, 7 },
+ /* SQ */
+ { SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT), 0, 8, 14 },
+ /* TCP */
+ { SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW), 0, 8, 14 },
+ /* TCI */
+ { SOC15_REG_ENTRY(GC, 0, regTCI_EDC_CNT), 0, 1, 69 },
+ /* TCC */
+ { SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2), 0, 1, 16 },
+ /* TCA */
+ { SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT), 0, 1, 2 },
+ /* TCX */
+ { SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT), 0, 1, 2 },
+ { SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2), 0, 1, 2 },
+ /* TD */
+ { SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT), 0, 8, 14 },
+ /* TA */
+ { SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT), 0, 8, 14 },
+ /* GCEA */
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 1, 16 },
+};
+
+static void gfx_v9_4_2_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static const struct soc15_ras_field_entry gfx_v9_4_2_ras_fields[] = {
+ /* CPF */
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) },
+ { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, regCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) },
+
+ /* CPC */
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, regCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, regCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, regDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_RESTORE_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) },
+ { "CPC_DC_RESTORE_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, regDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) },
+
+ /* GDS */
+ { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_GRBM_CNT),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+ { "GDS_ME0_GFXHP3D_PIX_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_PIX_DED) },
+ { "GDS_ME0_GFXHP3D_VTX_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_VTX_DED) },
+ { "GDS_ME0_CS_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_CS_DED) },
+ { "GDS_ME0_GFXHP3D_GS_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME0_GFXHP3D_GS_DED) },
+ { "GDS_ME1_PIPE0_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE0_DED) },
+ { "GDS_ME1_PIPE1_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE1_DED) },
+ { "GDS_ME1_PIPE2_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE2_DED) },
+ { "GDS_ME1_PIPE3_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME1_PIPE3_DED) },
+ { "GDS_ME2_PIPE0_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE0_DED) },
+ { "GDS_ME2_PIPE1_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE1_DED) },
+ { "GDS_ME2_PIPE2_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE2_DED) },
+ { "GDS_ME2_PIPE3_DED",
+ SOC15_REG_ENTRY(GC, 0, regGDS_EDC_OA_DED), 0, 0,
+ SOC15_REG_FIELD(GDS_EDC_OA_DED, ME2_PIPE3_DED) },
+
+ /* RLC */
+ { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) },
+ { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) },
+ { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) },
+ { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) },
+ { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) },
+ { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) },
+ { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, regRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) },
+
+ /* SPI */
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) },
+ { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) },
+ { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) },
+ { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, regSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT */
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU3_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU3_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU3_UTCL1_LFIFO_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT2 */
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_CNT3 */
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_DED_COUNT) },
+
+ /* SQC - regSQC_EDC_PARITY_CNT3 */
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, regSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_DED_COUNT) },
+
+ /* SQ */
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, regSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+
+ /* TCP */
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, regTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+
+ /* TCI */
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, regTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) },
+
+ /* TCC */
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, regTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) },
+
+ /* TCA */
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, regTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) },
+
+ /* TCX */
+ { "TCX_GROUP0", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP0_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP0_DED_COUNT) },
+ { "TCX_GROUP1", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP1_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP1_DED_COUNT) },
+ { "TCX_GROUP2", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP2_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP2_DED_COUNT) },
+ { "TCX_GROUP3", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP3_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP3_DED_COUNT) },
+ { "TCX_GROUP4", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP4_SEC_COUNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP4_DED_COUNT) },
+ { "TCX_GROUP5", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP5_SED_COUNT), 0, 0 },
+ { "TCX_GROUP6", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP6_SED_COUNT), 0, 0 },
+ { "TCX_GROUP7", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP7_SED_COUNT), 0, 0 },
+ { "TCX_GROUP8", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP8_SED_COUNT), 0, 0 },
+ { "TCX_GROUP9", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP9_SED_COUNT), 0, 0 },
+ { "TCX_GROUP10", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT),
+ SOC15_REG_FIELD(TCX_EDC_CNT, GROUP10_SED_COUNT), 0, 0 },
+ { "TCX_GROUP11", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP11_SED_COUNT), 0, 0 },
+ { "TCX_GROUP12", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP12_SED_COUNT), 0, 0 },
+ { "TCX_GROUP13", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP13_SED_COUNT), 0, 0 },
+ { "TCX_GROUP14", SOC15_REG_ENTRY(GC, 0, regTCX_EDC_CNT2),
+ SOC15_REG_FIELD(TCX_EDC_CNT2, GROUP14_SED_COUNT), 0, 0 },
+
+ /* TD */
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, regTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) },
+
+ /* TA */
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO_LO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_LO_DED_COUNT) },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO_HI", SOC15_REG_ENTRY(GC, 0, regTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_HI_DED_COUNT) },
+
+ /* EA - regGCEA_EDC_CNT */
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 },
+
+ /* EA - regGCEA_EDC_CNT2 */
+ { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) },
+ { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) },
+ { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) },
+ { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) },
+
+ /* EA - regGCEA_EDC_CNT3 */
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) },
+ { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) },
+ { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) },
+ { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) },
+ { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, regGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) },
+};
+
+static const char * const vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_ARADDRS",
+ "UTC_VML2_RDIF_LOG_FIFO",
+ "UTC_VML2_QUEUE_REQ",
+ "UTC_VML2_QUEUE_RET",
+};
+
+static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
+ { VML2_MEM, 8, 2, 2,
+ { SOC15_REG_ENTRY(GC, 0, regVML2_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regVML2_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(VML2_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(VML2_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, VML2_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { VML2_WALKER_MEM, ARRAY_SIZE(vml2_walker_mems), 1, 1,
+ { SOC15_REG_ENTRY(GC, 0, regVML2_WALKER_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regVML2_WALKER_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(VML2_WALKER_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(VML2_WALKER_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, VML2_WALKER_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { UTCL2_MEM, 18, 1, 2,
+ { SOC15_REG_ENTRY(GC, 0, regUTCL2_MEM_ECC_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regUTCL2_MEM_ECC_CNTL) },
+ SOC15_REG_FIELD(UTCL2_MEM_ECC_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(UTCL2_MEM_ECC_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, UTCL2_MEM_ECC_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_2M, 8, 2, 1,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_2M_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_2M_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_2M_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_2M_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_2M_DSM_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_32K, 8, 2, 2,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_32K_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_32K_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_32K_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_32K_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_32K_DSM_CNTL, WRITE_COUNTERS, 1) },
+ { ATC_L2_CACHE_4K, 8, 2, 8,
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_4K_DSM_INDEX) },
+ { SOC15_REG_ENTRY(GC, 0, regATC_L2_CACHE_4K_DSM_CNTL) },
+ SOC15_REG_FIELD(ATC_L2_CACHE_4K_DSM_CNTL, SEC_COUNT),
+ SOC15_REG_FIELD(ATC_L2_CACHE_4K_DSM_CNTL, DED_COUNT),
+ REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
+};
+
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
+
+static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id,
+ uint32_t value, uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_ras_fields); i++) {
+ if (gfx_v9_4_2_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_4_2_ras_fields[i].seg != reg->seg ||
+ gfx_v9_4_2_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = SOC15_RAS_REG_FIELD_VAL(
+ value, gfx_v9_4_2_ras_fields[i], sec);
+ if (sec_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_4_2_ras_fields[i].name, se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = SOC15_RAS_REG_FIELD_VAL(
+ value, gfx_v9_4_2_ras_fields[i], ded);
+ if (ded_cnt) {
+ dev_info(adev->dev,
+ "GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_4_2_ras_fields[i].name, se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_2_query_sram_edc_count(struct amdgpu_device *adev,
+ uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i, j, k, data;
+ uint32_t sec_cnt = 0, ded_cnt = 0;
+
+ if (sec_count && ded_count) {
+ *sec_count = 0;
+ *ded_count = 0;
+ }
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_2_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_2_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_2_select_se_sh(adev, j, 0, k);
+
+ /* if sec/ded_count is null, just clear counter */
+ if (!sec_count || !ded_count) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]), 0);
+ continue;
+ }
+
+ data = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]));
+
+ if (!data)
+ continue;
+
+ gfx_v9_4_2_get_reg_error_count(adev,
+ &gfx_v9_4_2_edc_counter_regs[i],
+ j, k, data, &sec_cnt, &ded_cnt);
+
+ /* clear counter after read */
+ WREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_edc_counter_regs[i]), 0);
+ }
+ }
+ }
+
+ if (sec_count && ded_count) {
+ *sec_count += sec_cnt;
+ *ded_count += ded_cnt;
+ }
+
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
+ struct gfx_v9_4_2_utc_block *blk,
+ uint32_t instance, uint32_t sec_cnt,
+ uint32_t ded_cnt)
+{
+ uint32_t bank, way, mem;
+ static const char *vml2_way_str[] = { "BIGK", "4K" };
+ static const char *utcl2_rounter_str[] = { "VMC", "APT" };
+
+ mem = instance % blk->num_mem_blocks;
+ way = (instance / blk->num_mem_blocks) % blk->num_ways;
+ bank = instance / (blk->num_mem_blocks * blk->num_ways);
+
+ switch (blk->type) {
+ case VML2_MEM:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_VML2_BANK_CACHE_%d_%s_MEM%d, SED %d, DED %d\n",
+ bank, vml2_way_str[way], mem, sec_cnt, ded_cnt);
+ break;
+ case VML2_WALKER_MEM:
+ dev_info(adev->dev, "GFX SubBlock %s, SED %d, DED %d\n",
+ vml2_walker_mems[bank], sec_cnt, ded_cnt);
+ break;
+ case UTCL2_MEM:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
+ bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_2M:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_2M_BANK%d_WAY%d_MEM, SED %d, DED %d\n",
+ bank, way, sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_32K:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_32K_BANK%d_WAY%d_MEM%d, SED %d, DED %d\n",
+ bank, way, mem, sec_cnt, ded_cnt);
+ break;
+ case ATC_L2_CACHE_4K:
+ dev_info(
+ adev->dev,
+ "GFX SubBlock UTC_ATCL2_CACHE_4K_BANK%d_WAY%d_MEM%d, SED %d, DED %d\n",
+ bank, way, mem, sec_cnt, ded_cnt);
+ break;
+ }
+}
+
+static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev,
+ uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i, j, data;
+ uint32_t sec_cnt, ded_cnt;
+ uint32_t num_instances;
+ struct gfx_v9_4_2_utc_block *blk;
+
+ if (sec_count && ded_count) {
+ *sec_count = 0;
+ *ded_count = 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_2_utc_blocks); i++) {
+ blk = &gfx_v9_4_2_utc_blocks[i];
+ num_instances =
+ blk->num_banks * blk->num_ways * blk->num_mem_blocks;
+ for (j = 0; j < num_instances; j++) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->idx_reg), j);
+
+ /* if sec/ded_count is NULL, just clear counter */
+ if (!sec_count || !ded_count) {
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg),
+ blk->clear);
+ continue;
+ }
+
+ data = RREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg));
+ if (!data)
+ continue;
+
+ sec_cnt = SOC15_RAS_REG_FIELD_VAL(data, *blk, sec);
+ *sec_count += sec_cnt;
+ ded_cnt = SOC15_RAS_REG_FIELD_VAL(data, *blk, ded);
+ *ded_count += ded_cnt;
+
+ /* clear counter after read */
+ WREG32(SOC15_REG_ENTRY_OFFSET(blk->data_reg),
+ blk->clear);
+
+ /* print the edc count */
+ if (sec_cnt || ded_cnt)
+ gfx_v9_4_2_log_utc_edc_count(adev, blk, j, sec_cnt,
+ ded_cnt);
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ gfx_v9_4_2_query_sram_edc_count(adev, &sec_count, &ded_count);
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_4_2_query_utc_edc_count(adev, &sec_count, &ded_count);
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+}
+
+static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+}
+
+static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
+ j++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0, j);
+ value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_ea_err_status_regs));
+ value = REG_SET_FIELD(value, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_query_sram_edc_count(adev, NULL, NULL);
+ gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
+}
+
+static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t reg_value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
+ j++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0, j);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_2_ea_err_status_regs));
+
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
+ j, reg_value);
+ }
+ /* clear after read */
+ reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), reg_value);
+ }
+ }
+
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_query_utc_err_status(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+ }
+}
+
+static void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_query_ea_err_status(adev);
+ gfx_v9_4_2_query_utc_err_status(adev);
+ gfx_v9_4_2_query_sq_timeout_status(adev);
+}
+
+static void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return;
+
+ gfx_v9_4_2_reset_utc_err_status(adev);
+ gfx_v9_4_2_reset_ea_err_status(adev);
+ gfx_v9_4_2_reset_sq_timeout_status(adev);
+}
+
+static void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev)
+{
+ uint32_t i;
+ uint32_t data;
+
+ data = REG_SET_FIELD(0, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ amdgpu_watchdog_timer.timeout_fatal_disable ? 1 :
+ 0);
+
+ if (amdgpu_watchdog_timer.timeout_fatal_disable &&
+ (amdgpu_watchdog_timer.period < 1 ||
+ amdgpu_watchdog_timer.period > 0x23)) {
+ dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n");
+ amdgpu_watchdog_timer.period = 0x23;
+ }
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL,
+ amdgpu_watchdog_timer.period);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ gfx_v9_4_2_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_CONFIG, data);
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC_EX(reg, GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v9_4_2_log_cu_timeout_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t i, simd, wave;
+ uint32_t wave_status;
+ uint32_t wave_pc_lo, wave_pc_hi;
+ uint32_t wave_exec_lo, wave_exec_hi;
+ uint32_t wave_inst_dw0, wave_inst_dw1;
+ uint32_t wave_ib_sts;
+
+ for (i = 0; i < 32; i++) {
+ if (!((i << 1) & status))
+ continue;
+
+ simd = i / cu_info->max_waves_per_simd;
+ wave = i % cu_info->max_waves_per_simd;
+
+ wave_status = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+ wave_pc_lo = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+ wave_pc_hi = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+ wave_exec_lo =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+ wave_exec_hi =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+ wave_inst_dw0 =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+ wave_inst_dw1 =
+ wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+ wave_ib_sts = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+
+ dev_info(
+ adev->dev,
+ "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
+ simd, wave, wave_status,
+ ((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
+ ((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
+ ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
+ wave_ib_sts);
+ }
+}
+
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+ uint32_t status;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines;
+ se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se;
+ sh_idx++) {
+ for (cu_idx = 0;
+ cu_idx < adev->gfx.config.max_cu_per_sh;
+ cu_idx++) {
+ gfx_v9_4_2_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx);
+ status = RREG32_SOC15(GC, 0,
+ regSQ_TIMEOUT_STATUS);
+ if (status != 0) {
+ dev_info(
+ adev->dev,
+ "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
+ se_idx, sh_idx, cu_idx);
+ gfx_v9_4_2_log_cu_timeout_status(
+ adev, status);
+ }
+ /* clear old status */
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines;
+ se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se;
+ sh_idx++) {
+ for (cu_idx = 0;
+ cu_idx < adev->gfx.config.max_cu_per_sh;
+ cu_idx++) {
+ gfx_v9_4_2_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx);
+ WREG32_SOC15(GC, 0, regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
+{
+ u32 status = 0;
+ struct amdgpu_vmhub *hub;
+
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ /* reset page fault status */
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+}
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
+ .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_2_ras_ops,
+ },
+ .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
+ .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
new file mode 100644
index 000000000..7584624b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_2_H__
+#define __GFX_V9_4_2_H__
+
+void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
+ uint32_t first_vmid, uint32_t last_vmid);
+void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t die_id);
+void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
+
+extern struct amdgpu_gfx_ras gfx_v9_4_2_ras;
+
+#endif /* __GFX_V9_4_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
new file mode 100644
index 000000000..e481ef73a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -0,0 +1,4421 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "vega10_enum.h"
+
+#include "v9_structs.h"
+
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+
+#include "gfx_v9_4_3.h"
+#include "amdgpu_xcp.h"
+
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+
+#define GFX9_MEC_HPD_SIZE 4096
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras;
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+
+static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_4_3_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_4_3_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_4_3_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs;
+}
+
+static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
+{
+ int i, num_xcc, dev_inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ dev_inst = GET_INST(GC, i);
+
+ WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
+ GOLDEN_GB_ADDR_CONFIG);
+ /* Golden settings applied by driver for ASIC with rev_id 0 */
+ if (adev->rev_id == 0) {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
+ REDUCE_FIFO_DEPTH_BY_2, 2);
+ } else {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
+ SPARE, 0x1);
+ }
+ }
+}
+
+static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) |
+ (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
+{
+ uint32_t scratch_reg0_offset, xcc_offset;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* Use register offset which is local to XCC in the packet */
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
+ WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch_reg0_offset);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err1;
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/* This value might differs per partition */
+static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+
+ return clock;
+}
+
+static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.ce_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+
+ return err;
+}
+
+static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev)
+{
+ return true;
+}
+
+static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev)
+{
+ if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+}
+
+static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
+ const char *chip_name)
+{
+ char fw_name[30];
+ int err;
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+ adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
+ adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
+
+ gfx_v9_4_3_check_if_need_gfxoff(adev);
+
+out:
+ if (err)
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ return err;
+}
+
+static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ int r;
+
+ chip_name = "gc_9_4_3";
+
+ r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+ u32 *hpd;
+ const __le32 *fw_data;
+ unsigned fw_size;
+ u32 *fw;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
+ AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size =
+ adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ for (i = 0; i < mec_hpd_size / 4; i++) {
+ memset((void *)(hpd + i), 0, 4);
+ if (i % 50 == 0)
+ msleep(1);
+ }
+ } else {
+ memset(hpd, 0, mec_hpd_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
+ gfx_v9_4_3_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_INDEX, instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK));
+ return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t regno, uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+ (SQ_IND_INDEX__FORCE_READ_MASK) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* type 1 wave data */
+ dst[(*no_fields)++] = 1;
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, simd, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
+}
+
+
+static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
+ int num_xccs_per_xcp)
+{
+ int ret, i, num_xcc;
+ u32 tmp = 0, regval;
+
+ if (adev->psp.funcs) {
+ ret = psp_spatial_partition(&adev->psp,
+ NUM_XCC(adev->gfx.xcc_mask) /
+ num_xccs_per_xcp);
+ if (ret)
+ return ret;
+ }
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (i = 0; i < num_xcc; i++) {
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP,
+ num_xccs_per_xcp);
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID,
+ i % num_xccs_per_xcp);
+ regval = RREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL);
+ if (regval != tmp)
+ WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL,
+ tmp);
+ }
+
+ adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
+
+ return 0;
+}
+
+static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
+{
+ int xcc;
+
+ xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0));
+ if (!xcc) {
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
+ }
+
+ return xcc - 1;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh,
+ .read_wave_data = &gfx_v9_4_3_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
+ .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
+ .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+};
+
+static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
+ adev->gfx.ras = &gfx_v9_4_3_ras;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_BANKS);
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(
+ adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+ unsigned int hw_prio;
+ uint32_t xcc_doorbell_start;
+
+ ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
+ ring_id];
+
+ /* mec0 is me1 */
+ ring->xcc_id = xcc_id;
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range;
+ ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
+ (ring_id + xcc_id * adev->gfx.num_compute_rings) *
+ GFX9_MEC_HPD_SIZE;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "comp_%d.%d.%d.%d",
+ ring->xcc_id, ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+}
+
+static int gfx_v9_4_3_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id, xcc_id, num_xcc;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = adev->gfx.rlc.funcs->init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v9_4_3_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec;
+ k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(
+ adev, xcc_id, i, k, j))
+ continue;
+
+ r = gfx_v9_4_3_compute_ring_init(adev,
+ ring_id,
+ xcc_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq[xcc_id];
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id);
+ if (r)
+ return r;
+
+ /* create MQD for all compute queues as wel as KIQ for SRIOV case */
+ r = amdgpu_gfx_mqd_sw_init(adev,
+ sizeof(struct v9_mqd_allocation), xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_4_3_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_ras_sw_init(adev);
+ if (r)
+ return r;
+
+
+ if (!amdgpu_sriov_vf(adev))
+ r = amdgpu_gfx_sysfs_init(adev);
+
+ return r;
+}
+
+static int gfx_v9_4_3_sw_fini(void *handle)
+{
+ int i, num_xcc;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ for (i = 0; i < num_xcc; i++) {
+ amdgpu_gfx_mqd_sw_fini(adev, i);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
+ amdgpu_gfx_kiq_fini(adev, i);
+ }
+
+ gfx_v9_4_3_mec_fini(adev);
+ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+ gfx_v9_4_3_free_microcode(adev);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_gfx_sysfs_fini(adev);
+
+ return 0;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0);
+ }
+}
+
+static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp;
+ int i;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
+ !!adev->gmc.noretry);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >>
+ 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >>
+ 48));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_BASES, tmp);
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0));
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 =
+ RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+}
+
+static void
+gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1);
+}
+
+static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
+{
+ /*
+ * Rlc save restore list is workable since v2_1.
+ * And it's needed by gfxoff feature.
+ */
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
+}
+
+static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_setting;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
+ if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
+ return false;
+
+ return true;
+}
+
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+}
+
+static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ int xcc_id, num_xcc;
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
+ }
+}
+
+static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
+{
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 i, j, k;
+ u32 mask;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff,
+ xcc_id);
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0)
+ break;
+ udelay(1);
+ }
+ if (k == adev->usec_timeout) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff,
+ 0xffffffff,
+ 0xffffffff, xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ i, j);
+ return;
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
+ RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
+ for (k = 0; k < adev->usec_timeout; k++) {
+ if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ u32 tmp;
+
+ /* These interrupts should be enabled to drive DS clock */
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 0);
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_stop(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET,
+ SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_rlc_reset(adev, i);
+}
+
+static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL,
+ RLC_ENABLE_F32, 1);
+ udelay(50);
+
+ /* carrizo do enable cp interrupt after cp inited */
+ if (!(adev->flags & AMD_IS_APU)) {
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+ udelay(50);
+ }
+}
+
+static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
+{
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ u32 rlc_ucode_ver;
+#endif
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_rlc_start(adev, i);
+#ifdef AMDGPU_RLC_DEBUG_RETRY
+ /* RLC_GPM_GENERAL_6 : RLC Ucode version */
+ rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6);
+ if (rlc_ucode_ver == 0x108) {
+ dev_info(adev->dev,
+ "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ rlc_ucode_ver, adev->gfx.rlc_fw_version);
+ /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
+ * default is 0x9C4 to create a 100us interval */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4);
+ /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
+ * to disable the page fault retry interrupts, default is
+ * 0x100 (256) */
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100);
+ }
+#endif
+ }
+}
+
+static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+ for (i = 0; i < fw_size; i++) {
+ if (amdgpu_emu_mode == 1 && i % 100 == 0) {
+ dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i);
+ msleep(1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ int r;
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id);
+ /* legacy rlc firmware loading */
+ r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ gfx_v9_4_3_xcc_rlc_start(adev, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ /* disable CG */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+ gfx_v9_4_3_xcc_init_pg(adev, xcc_id);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
+ unsigned vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+}
+
+static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
+ {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)},
+ {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)},
+};
+
+static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i, inst;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ inst = adev->ip_map.logical_to_dev_inst ?
+ adev->ip_map.logical_to_dev_inst(
+ adev, entry->hwip, entry->instance) :
+ entry->instance;
+ reg = adev->reg_offset[entry->hwip][inst][entry->segment] +
+ entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v9_4_3_check_rlcg_range(adev, offset,
+ (void *)rlcg_access_gc_9_4_3,
+ ARRAY_SIZE(rlcg_access_gc_9_4_3));
+}
+
+static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ if (enable) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ adev->gfx.kiq[xcc_id].ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+ u32 mec_ucode_addr_offset;
+ u32 mec_ucode_data_offset;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ mec_ucode_addr_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR);
+ mec_ucode_data_offset =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA);
+
+ /* MEC1 */
+ WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset);
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32(mec_ucode_data_offset,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version);
+ /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
+
+ return 0;
+}
+
+/* KIQ functions */
+static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+}
+
+static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ }
+ }
+}
+
+static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ mqd->dynamic_cu_mask_addr_lo =
+ lower_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+ mqd->dynamic_cu_mask_addr_hi =
+ upper_32_bits(ring->mqd_gpu_addr
+ + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a queue/ring */
+ gfx_v9_4_3_mqd_set_priority(ring, mqd);
+ mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM);
+
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_LOWER,
+ ((adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ WREG32_SOC15(
+ GC, GET_INST(GC, xcc_id),
+ regCP_MEC_DOORBELL_RANGE_UPPER,
+ ((adev->doorbell_index.userqueue_end +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int j;
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+
+ if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+ DRM_DEBUG("%s dequeue request failed.\n", ring->name);
+
+ /* Manual disable if dequeue request times out */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ 0);
+ }
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, CP_HQD_PERSISTENT_STATE_DEFAULT);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ struct v9_mqd *tmp_mqd;
+
+ gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id);
+
+ /* GPU could be in bad state during probe, driver trigger the reset
+ * after load the SMU, in this case , the mqd is not be initialized.
+ * driver need to re-init the mqd.
+ * check mqd->cp_hqd_pq_control since this value should not be 0
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup;
+ if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) {
+ /* for GPU_RESET case , reset MQD to a clean status */
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v9_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+ struct v9_mqd *tmp_mqd;
+
+ /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
+ * is not be initialized before
+ */
+ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
+
+ if (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
+ ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
+ ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_mqd_init(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int j;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings];
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me,
+ ring->pipe,
+ ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[xcc_id].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return 0;
+}
+
+static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, xcc_id);
+done:
+ return r;
+}
+
+static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r, j;
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id);
+
+ r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++) {
+ ring = &adev->gfx.compute_ring
+ [j + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
+{
+ int r = 0, i, num_xcc;
+
+ if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
+ amdgpu_user_partt_mode);
+
+ if (r)
+ return r;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable,
+ int xcc_id)
+{
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id);
+}
+
+static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
+{
+ if (amdgpu_gfx_disable_kcq(adev, xcc_id))
+ DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id);
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* must disable polling for SRIOV when hw finished, otherwise
+ * CPC engine may still keep fetching WB address which is already
+ * invalid after sw finished and trigger DMAR reading error in
+ * hypervisor side.
+ */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+ return;
+ }
+
+ /* Use deinitialize sequence from CAIL when unbinding device
+ * from driver, otherwise KIQ is hanging when binding back
+ */
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me,
+ adev->gfx.kiq[xcc_id].ring.pipe,
+ adev->gfx.kiq[xcc_id].ring.queue, 0,
+ GET_INST(GC, xcc_id));
+ gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring,
+ xcc_id);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
+ gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+}
+
+static int gfx_v9_4_3_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfx_v9_4_3_init_golden_registers(adev);
+
+ gfx_v9_4_3_constants_init(adev);
+
+ r = adev->gfx.rlc.funcs->resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v9_4_3_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v9_4_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v9_4_3_xcc_fini(adev, i);
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_suspend(void *handle)
+{
+ return gfx_v9_4_3_hw_fini(handle);
+}
+
+static int gfx_v9_4_3_resume(void *handle)
+{
+ return gfx_v9_4_3_hw_init(handle);
+}
+
+static bool gfx_v9_4_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ }
+ return true;
+}
+
+static int gfx_v9_4_3_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v9_4_3_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v9_4_3_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
+ GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
+ GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
+ GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v9_4_3_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v9_4_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ gfx_v9_4_3_set_kiq_pm4_funcs(adev);
+ gfx_v9_4_3_set_ring_funcs(adev);
+ gfx_v9_4_3_set_irq_funcs(adev);
+ gfx_v9_4_3_set_gds_init(adev);
+ gfx_v9_4_3_set_rlc_funcs(adev);
+
+ /* init rlcg reg access ctrl */
+ gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v9_4_3_init_microcode(adev);
+}
+
+static int gfx_v9_4_3_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ if (adev->gfx.ras &&
+ adev->gfx.ras->enable_watchdog_timer)
+ adev->gfx.ras->enable_watchdog_timer(adev);
+
+ return 0;
+}
+
+static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+
+}
+
+static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void
+gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data);
+ }
+ }
+
+}
+
+static void
+gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ data = (0x36
+ << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+ }
+
+}
+
+static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+
+ if (enable) {
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === CGCG + CGLS === */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable,
+ xcc_id);
+ /* === MGCG + MGLS === */
+ gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable,
+ xcc_id);
+
+ /* FGCG */
+ gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id);
+ gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
+ .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled,
+ .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode,
+ .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode,
+ .init = gfx_v9_4_3_rlc_init,
+ .resume = gfx_v9_4_3_rlc_resume,
+ .stop = gfx_v9_4_3_rlc_stop,
+ .reset = gfx_v9_4_3_rlc_reset,
+ .start = gfx_v9_4_3_rlc_start,
+ .update_spm_vmid = gfx_v9_4_3_update_spm_vmid,
+ .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
+};
+
+static int gfx_v9_4_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int gfx_v9_4_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE));
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL));
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL));
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL));
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+}
+
+static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ seq, 0xffffffff, 4);
+}
+
+static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
+}
+
+static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx9 now */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
+static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ struct amdgpu_device *adev, int me, int pipe,
+ enum amdgpu_interrupt_state state, int xcc_id)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ switch (type) {
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 3, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
+ adev, 2, 3, state, i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i, xcc_id;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return -EINVAL;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v9_4_3_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i, xcc_id;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return;
+
+ switch (me_id) {
+ case 0:
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ }
+}
+
+static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring,
+ uint32_t pipe, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ uint32_t wcl_cs_reg;
+
+ /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+ val = enable ? 0x1 : 0x7f;
+
+ switch (pipe) {
+ case 0:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0);
+ break;
+ case 1:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1);
+ break;
+ case 2:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2);
+ break;
+ case 3:
+ wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+
+ amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+ int i;
+
+ /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : 0x07ffffff;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+
+ /* Restrict waves for normal/low priority compute queues as well
+ * to get best QoS for high priority compute jobs.
+ *
+ * amdgpu controls only 1st ME(0-3 CS pipes).
+ */
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ if (i != ring->pipe)
+ gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable);
+
+ }
+}
+
+enum amdgpu_gfx_cp_ras_mem_id {
+ AMDGPU_GFX_CP_MEM1 = 1,
+ AMDGPU_GFX_CP_MEM2,
+ AMDGPU_GFX_CP_MEM3,
+ AMDGPU_GFX_CP_MEM4,
+ AMDGPU_GFX_CP_MEM5,
+};
+
+enum amdgpu_gfx_gcea_ras_mem_id {
+ AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4,
+ AMDGPU_GFX_GCEA_IORD_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIWR_CMDMEM,
+ AMDGPU_GFX_GCEA_GMIRD_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMWR_CMDMEM,
+ AMDGPU_GFX_GCEA_DRAMRD_CMDMEM,
+ AMDGPU_GFX_GCEA_MAM_DMEM0,
+ AMDGPU_GFX_GCEA_MAM_DMEM1,
+ AMDGPU_GFX_GCEA_MAM_DMEM2,
+ AMDGPU_GFX_GCEA_MAM_DMEM3,
+ AMDGPU_GFX_GCEA_MAM_AMEM0,
+ AMDGPU_GFX_GCEA_MAM_AMEM1,
+ AMDGPU_GFX_GCEA_MAM_AMEM2,
+ AMDGPU_GFX_GCEA_MAM_AMEM3,
+ AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER,
+ AMDGPU_GFX_GCEA_WRET_TAGMEM,
+ AMDGPU_GFX_GCEA_RRET_TAGMEM,
+ AMDGPU_GFX_GCEA_IOWR_DATAMEM,
+ AMDGPU_GFX_GCEA_GMIWR_DATAMEM,
+ AMDGPU_GFX_GCEA_DRAM_DATAMEM,
+};
+
+enum amdgpu_gfx_gc_cane_ras_mem_id {
+ AMDGPU_GFX_GC_CANE_MEM0 = 0,
+};
+
+enum amdgpu_gfx_gcutcl2_ras_mem_id {
+ AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160,
+};
+
+enum amdgpu_gfx_gds_ras_mem_id {
+ AMDGPU_GFX_GDS_MEM0 = 0,
+};
+
+enum amdgpu_gfx_lds_ras_mem_id {
+ AMDGPU_GFX_LDS_BANK0 = 0,
+ AMDGPU_GFX_LDS_BANK1,
+ AMDGPU_GFX_LDS_BANK2,
+ AMDGPU_GFX_LDS_BANK3,
+ AMDGPU_GFX_LDS_BANK4,
+ AMDGPU_GFX_LDS_BANK5,
+ AMDGPU_GFX_LDS_BANK6,
+ AMDGPU_GFX_LDS_BANK7,
+ AMDGPU_GFX_LDS_BANK8,
+ AMDGPU_GFX_LDS_BANK9,
+ AMDGPU_GFX_LDS_BANK10,
+ AMDGPU_GFX_LDS_BANK11,
+ AMDGPU_GFX_LDS_BANK12,
+ AMDGPU_GFX_LDS_BANK13,
+ AMDGPU_GFX_LDS_BANK14,
+ AMDGPU_GFX_LDS_BANK15,
+ AMDGPU_GFX_LDS_BANK16,
+ AMDGPU_GFX_LDS_BANK17,
+ AMDGPU_GFX_LDS_BANK18,
+ AMDGPU_GFX_LDS_BANK19,
+ AMDGPU_GFX_LDS_BANK20,
+ AMDGPU_GFX_LDS_BANK21,
+ AMDGPU_GFX_LDS_BANK22,
+ AMDGPU_GFX_LDS_BANK23,
+ AMDGPU_GFX_LDS_BANK24,
+ AMDGPU_GFX_LDS_BANK25,
+ AMDGPU_GFX_LDS_BANK26,
+ AMDGPU_GFX_LDS_BANK27,
+ AMDGPU_GFX_LDS_BANK28,
+ AMDGPU_GFX_LDS_BANK29,
+ AMDGPU_GFX_LDS_BANK30,
+ AMDGPU_GFX_LDS_BANK31,
+ AMDGPU_GFX_LDS_SP_BUFFER_A,
+ AMDGPU_GFX_LDS_SP_BUFFER_B,
+};
+
+enum amdgpu_gfx_rlc_ras_mem_id {
+ AMDGPU_GFX_RLC_GPMF32 = 1,
+ AMDGPU_GFX_RLC_RLCVF32,
+ AMDGPU_GFX_RLC_SCRATCH,
+ AMDGPU_GFX_RLC_SRM_ARAM,
+ AMDGPU_GFX_RLC_SRM_DRAM,
+ AMDGPU_GFX_RLC_TCTAG,
+ AMDGPU_GFX_RLC_SPM_SE,
+ AMDGPU_GFX_RLC_SPM_GRBMT,
+};
+
+enum amdgpu_gfx_sp_ras_mem_id {
+ AMDGPU_GFX_SP_SIMDID0 = 0,
+};
+
+enum amdgpu_gfx_spi_ras_mem_id {
+ AMDGPU_GFX_SPI_MEM0 = 0,
+ AMDGPU_GFX_SPI_MEM1,
+ AMDGPU_GFX_SPI_MEM2,
+ AMDGPU_GFX_SPI_MEM3,
+};
+
+enum amdgpu_gfx_sqc_ras_mem_id {
+ AMDGPU_GFX_SQC_INST_CACHE_A = 100,
+ AMDGPU_GFX_SQC_INST_CACHE_B = 101,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102,
+ AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104,
+ AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106,
+ AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107,
+ AMDGPU_GFX_SQC_DATA_CACHE_A = 200,
+ AMDGPU_GFX_SQC_DATA_CACHE_B = 201,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202,
+ AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204,
+ AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206,
+ AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207,
+ AMDGPU_GFX_SQC_DIRTY_BIT_A = 208,
+ AMDGPU_GFX_SQC_DIRTY_BIT_B = 209,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210,
+ AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213,
+ AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108,
+};
+
+enum amdgpu_gfx_sq_ras_mem_id {
+ AMDGPU_GFX_SQ_SGPR_MEM0 = 0,
+ AMDGPU_GFX_SQ_SGPR_MEM1,
+ AMDGPU_GFX_SQ_SGPR_MEM2,
+ AMDGPU_GFX_SQ_SGPR_MEM3,
+};
+
+enum amdgpu_gfx_ta_ras_mem_id {
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1,
+ AMDGPU_GFX_TA_FS_AFIFO_RAM_HI,
+ AMDGPU_GFX_TA_FS_CFIFO_RAM,
+ AMDGPU_GFX_TA_FSX_LFIFO,
+ AMDGPU_GFX_TA_FS_DFIFO_RAM,
+};
+
+enum amdgpu_gfx_tcc_ras_mem_id {
+ AMDGPU_GFX_TCC_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tca_ras_mem_id {
+ AMDGPU_GFX_TCA_MEM1 = 1,
+};
+
+enum amdgpu_gfx_tci_ras_mem_id {
+ AMDGPU_GFX_TCIW_MEM = 1,
+};
+
+enum amdgpu_gfx_tcp_ras_mem_id {
+ AMDGPU_GFX_TCP_LFIFO0 = 1,
+ AMDGPU_GFX_TCP_SET0BANK0_RAM,
+ AMDGPU_GFX_TCP_SET0BANK1_RAM,
+ AMDGPU_GFX_TCP_SET0BANK2_RAM,
+ AMDGPU_GFX_TCP_SET0BANK3_RAM,
+ AMDGPU_GFX_TCP_SET1BANK0_RAM,
+ AMDGPU_GFX_TCP_SET1BANK1_RAM,
+ AMDGPU_GFX_TCP_SET1BANK2_RAM,
+ AMDGPU_GFX_TCP_SET1BANK3_RAM,
+ AMDGPU_GFX_TCP_SET2BANK0_RAM,
+ AMDGPU_GFX_TCP_SET2BANK1_RAM,
+ AMDGPU_GFX_TCP_SET2BANK2_RAM,
+ AMDGPU_GFX_TCP_SET2BANK3_RAM,
+ AMDGPU_GFX_TCP_SET3BANK0_RAM,
+ AMDGPU_GFX_TCP_SET3BANK1_RAM,
+ AMDGPU_GFX_TCP_SET3BANK2_RAM,
+ AMDGPU_GFX_TCP_SET3BANK3_RAM,
+ AMDGPU_GFX_TCP_VM_FIFO,
+ AMDGPU_GFX_TCP_DB_TAGRAM0,
+ AMDGPU_GFX_TCP_DB_TAGRAM1,
+ AMDGPU_GFX_TCP_DB_TAGRAM2,
+ AMDGPU_GFX_TCP_DB_TAGRAM3,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0,
+ AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1,
+ AMDGPU_GFX_TCP_CMD_FIFO,
+};
+
+enum amdgpu_gfx_td_ras_mem_id {
+ AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM,
+ AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM,
+};
+
+enum amdgpu_gfx_tcx_ras_mem_id {
+ AMDGPU_GFX_TCX_FIFOD0 = 0,
+ AMDGPU_GFX_TCX_FIFOD1,
+ AMDGPU_GFX_TCX_FIFOD2,
+ AMDGPU_GFX_TCX_FIFOD3,
+ AMDGPU_GFX_TCX_FIFOD4,
+ AMDGPU_GFX_TCX_FIFOD5,
+ AMDGPU_GFX_TCX_FIFOD6,
+ AMDGPU_GFX_TCX_FIFOD7,
+ AMDGPU_GFX_TCX_FIFOB0,
+ AMDGPU_GFX_TCX_FIFOB1,
+ AMDGPU_GFX_TCX_FIFOB2,
+ AMDGPU_GFX_TCX_FIFOB3,
+ AMDGPU_GFX_TCX_FIFOB4,
+ AMDGPU_GFX_TCX_FIFOB5,
+ AMDGPU_GFX_TCX_FIFOB6,
+ AMDGPU_GFX_TCX_FIFOB7,
+ AMDGPU_GFX_TCX_FIFOA0,
+ AMDGPU_GFX_TCX_FIFOA1,
+ AMDGPU_GFX_TCX_FIFOA2,
+ AMDGPU_GFX_TCX_FIFOA3,
+ AMDGPU_GFX_TCX_FIFOA4,
+ AMDGPU_GFX_TCX_FIFOA5,
+ AMDGPU_GFX_TCX_FIFOA6,
+ AMDGPU_GFX_TCX_FIFOA7,
+ AMDGPU_GFX_TCX_CFIFO0,
+ AMDGPU_GFX_TCX_CFIFO1,
+ AMDGPU_GFX_TCX_CFIFO2,
+ AMDGPU_GFX_TCX_CFIFO3,
+ AMDGPU_GFX_TCX_CFIFO4,
+ AMDGPU_GFX_TCX_CFIFO5,
+ AMDGPU_GFX_TCX_CFIFO6,
+ AMDGPU_GFX_TCX_CFIFO7,
+ AMDGPU_GFX_TCX_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_FIFO_ACKD7,
+ AMDGPU_GFX_TCX_DST_FIFOA0,
+ AMDGPU_GFX_TCX_DST_FIFOA1,
+ AMDGPU_GFX_TCX_DST_FIFOA2,
+ AMDGPU_GFX_TCX_DST_FIFOA3,
+ AMDGPU_GFX_TCX_DST_FIFOA4,
+ AMDGPU_GFX_TCX_DST_FIFOA5,
+ AMDGPU_GFX_TCX_DST_FIFOA6,
+ AMDGPU_GFX_TCX_DST_FIFOA7,
+ AMDGPU_GFX_TCX_DST_FIFOB0,
+ AMDGPU_GFX_TCX_DST_FIFOB1,
+ AMDGPU_GFX_TCX_DST_FIFOB2,
+ AMDGPU_GFX_TCX_DST_FIFOB3,
+ AMDGPU_GFX_TCX_DST_FIFOB4,
+ AMDGPU_GFX_TCX_DST_FIFOB5,
+ AMDGPU_GFX_TCX_DST_FIFOB6,
+ AMDGPU_GFX_TCX_DST_FIFOB7,
+ AMDGPU_GFX_TCX_DST_FIFOD0,
+ AMDGPU_GFX_TCX_DST_FIFOD1,
+ AMDGPU_GFX_TCX_DST_FIFOD2,
+ AMDGPU_GFX_TCX_DST_FIFOD3,
+ AMDGPU_GFX_TCX_DST_FIFOD4,
+ AMDGPU_GFX_TCX_DST_FIFOD5,
+ AMDGPU_GFX_TCX_DST_FIFOD6,
+ AMDGPU_GFX_TCX_DST_FIFOD7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKB7,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD0,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD1,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD2,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD3,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD4,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD5,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD6,
+ AMDGPU_GFX_TCX_DST_FIFO_ACKD7,
+};
+
+enum amdgpu_gfx_atc_l2_ras_mem_id {
+ AMDGPU_GFX_ATC_L2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_utcl2_ras_mem_id {
+ AMDGPU_GFX_UTCL2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_ras_mem_id {
+ AMDGPU_GFX_VML2_MEM0 = 0,
+};
+
+enum amdgpu_gfx_vml2_walker_ras_mem_id {
+ AMDGPU_GFX_VML2_WALKER_MEM0 = 0,
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = {
+ {AMDGPU_GFX_CP_MEM1, "CP_MEM1"},
+ {AMDGPU_GFX_CP_MEM2, "CP_MEM2"},
+ {AMDGPU_GFX_CP_MEM3, "CP_MEM3"},
+ {AMDGPU_GFX_CP_MEM4, "CP_MEM4"},
+ {AMDGPU_GFX_CP_MEM5, "CP_MEM5"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = {
+ {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"},
+ {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"},
+ {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"},
+ {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"},
+ {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"},
+ {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"},
+ {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = {
+ {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = {
+ {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = {
+ {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = {
+ {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"},
+ {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"},
+ {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"},
+ {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"},
+ {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"},
+ {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"},
+ {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"},
+ {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"},
+ {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"},
+ {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"},
+ {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"},
+ {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"},
+ {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"},
+ {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"},
+ {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"},
+ {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"},
+ {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"},
+ {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"},
+ {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"},
+ {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"},
+ {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"},
+ {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"},
+ {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"},
+ {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"},
+ {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"},
+ {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"},
+ {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"},
+ {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"},
+ {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"},
+ {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"},
+ {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"},
+ {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"},
+ {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = {
+ {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"},
+ {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"},
+ {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"},
+ {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"},
+ {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"},
+ {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"},
+ {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"},
+ {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = {
+ {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = {
+ {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"},
+ {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"},
+ {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"},
+ {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = {
+ {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"},
+ {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"},
+ {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"},
+ {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"},
+ {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = {
+ {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"},
+ {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"},
+ {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"},
+ {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = {
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"},
+ {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"},
+ {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"},
+ {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"},
+ {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = {
+ {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = {
+ {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = {
+ {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = {
+ {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"},
+ {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"},
+ {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"},
+ {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"},
+ {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"},
+ {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"},
+ {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = {
+ {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"},
+ {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = {
+ {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"},
+ {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"},
+ {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"},
+ {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"},
+ {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"},
+ {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"},
+ {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"},
+ {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"},
+ {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"},
+ {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"},
+ {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"},
+ {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"},
+ {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"},
+ {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"},
+ {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"},
+ {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"},
+ {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"},
+ {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"},
+ {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"},
+ {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"},
+ {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"},
+ {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"},
+ {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"},
+ {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"},
+ {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"},
+ {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"},
+ {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"},
+ {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"},
+ {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"},
+ {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"},
+ {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"},
+ {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"},
+ {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"},
+ {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"},
+ {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"},
+ {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"},
+ {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"},
+ {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"},
+ {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"},
+ {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"},
+ {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"},
+ {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"},
+ {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"},
+ {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"},
+ {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"},
+ {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"},
+ {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"},
+ {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"},
+ {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"},
+ {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"},
+ {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"},
+ {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"},
+ {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"},
+ {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"},
+ {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"},
+ {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"},
+ {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = {
+ {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = {
+ {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = {
+ {AMDGPU_GFX_VML2_MEM, "VML2_MEM"},
+};
+
+static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = {
+ {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"},
+};
+
+static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = {
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list)
+ AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list)
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG),
+ 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 1},
+};
+
+static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"},
+ AMDGPU_GFX_RLC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"},
+ AMDGPU_GFX_CP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"},
+ AMDGPU_GFX_GDS_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"},
+ AMDGPU_GFX_GC_CANE_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
+ AMDGPU_GFX_SPI_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
+ AMDGPU_GFX_SP_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
+ AMDGPU_GFX_SQ_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
+ 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
+ AMDGPU_GFX_SQC_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
+ AMDGPU_GFX_TCX_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"},
+ AMDGPU_GFX_TCC_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
+ AMDGPU_GFX_TA_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG),
+ 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ AMDGPU_GFX_TCI_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
+ AMDGPU_GFX_TCP_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
+ AMDGPU_GFX_TD_MEM, 8},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI),
+ 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
+ AMDGPU_GFX_TCA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI),
+ 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
+ AMDGPU_GFX_GCEA_MEM, 1},
+ {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI),
+ 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
+ AMDGPU_GFX_LDS_MEM, 1},
+};
+
+static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
+
+static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ unsigned long ce_count = 0, ue_count = 0;
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &ce_count);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ /* the caller should make sure initialize value of
+ * err_data->ue_count and err_data->ce_count
+ */
+ err_data->ce_count += ce_count;
+ err_data->ue_count += ue_count;
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ce_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
+ }
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t i, j;
+ uint32_t reg_value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
+ reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGCEA_ERR_STATUS);
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "GCEA err detected at instance: %d, status: 0x%x!\n",
+ j, reg_value);
+ }
+ /* clear after read */
+ reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS,
+ reg_value);
+ }
+ }
+
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
+ }
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regVML2_WALKER_MEM_ECC_STATUS);
+ if (data) {
+ dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
+ 0x3);
+ }
+}
+
+static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
+ uint32_t status, int xcc_id)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t i, simd, wave;
+ uint32_t wave_status;
+ uint32_t wave_pc_lo, wave_pc_hi;
+ uint32_t wave_exec_lo, wave_exec_hi;
+ uint32_t wave_inst_dw0, wave_inst_dw1;
+ uint32_t wave_ib_sts;
+
+ for (i = 0; i < 32; i++) {
+ if (!((i << 1) & status))
+ continue;
+
+ simd = i / cu_info->max_waves_per_simd;
+ wave = i % cu_info->max_waves_per_simd;
+
+ wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
+ wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
+ wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
+ wave_exec_lo =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
+ wave_exec_hi =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
+ wave_inst_dw0 =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
+ wave_inst_dw1 =
+ wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
+ wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
+
+ dev_info(
+ adev->dev,
+ "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
+ simd, wave, wave_status,
+ ((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
+ ((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
+ ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
+ wave_ib_sts);
+ }
+}
+
+static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+ uint32_t status;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
+ for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx, xcc_id);
+ status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS);
+ if (status != 0) {
+ dev_info(
+ adev->dev,
+ "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
+ se_idx, sh_idx, cu_idx);
+ gfx_v9_4_3_log_cu_timeout_status(
+ adev, status, xcc_id);
+ }
+ /* clear old status */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+}
+
+static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t i, j;
+ uint32_t value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
+ value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
+ value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t se_idx, sh_idx, cu_idx;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
+ for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
+ for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
+ cu_idx, xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSQ_TIMEOUT_STATUS, 0);
+ }
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
+ gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
+}
+
+static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
+ void *ras_error_status, int xcc_id)
+{
+ uint32_t i;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
+
+ if (amdgpu_watchdog_timer.timeout_fatal_disable &&
+ (amdgpu_watchdog_timer.period < 1 ||
+ amdgpu_watchdog_timer.period > 0x23)) {
+ dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n");
+ amdgpu_watchdog_timer.period = 0x23;
+ }
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL,
+ amdgpu_watchdog_timer.period);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, 0xffffffff, 0xffffffff, xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_TIMEOUT_CONFIG, data);
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_gfx_ras_error_func(adev, ras_error_status,
+ gfx_v9_4_3_inst_query_ras_err_count);
+}
+
+static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
+}
+
+static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
+}
+
+static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
+}
+
+static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
+{
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
+}
+
+static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
+ .name = "gfx_v9_4_3",
+ .early_init = gfx_v9_4_3_early_init,
+ .late_init = gfx_v9_4_3_late_init,
+ .sw_init = gfx_v9_4_3_sw_init,
+ .sw_fini = gfx_v9_4_3_sw_fini,
+ .hw_init = gfx_v9_4_3_hw_init,
+ .hw_fini = gfx_v9_4_3_hw_fini,
+ .suspend = gfx_v9_4_3_suspend,
+ .resume = gfx_v9_4_3_resume,
+ .is_idle = gfx_v9_4_3_is_idle,
+ .wait_for_idle = gfx_v9_4_3_wait_for_idle,
+ .soft_reset = gfx_v9_4_3_soft_reset,
+ .set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
+ .set_powergating_state = gfx_v9_4_3_set_powergating_state,
+ .get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */
+ 7 + /* gfx_v9_4_3_emit_mem_sync */
+ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */
+ 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute,
+ .emit_fence = gfx_v9_4_3_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .test_ib = gfx_v9_4_3_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v9_4_3_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_4_3_emit_wave_limit,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v9_4_3_ring_get_rptr_compute,
+ .get_wptr = gfx_v9_4_3_ring_get_wptr_compute,
+ .set_wptr = gfx_v9_4_3_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v9_4_3_ring_emit_gds_switch */
+ 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_4_3_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
+ .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq,
+ .test_ring = gfx_v9_4_3_ring_test_ring,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v9_4_3_ring_emit_rreg,
+ .emit_wreg = gfx_v9_4_3_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++)
+ adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs
+ = &gfx_v9_4_3_ring_funcs_compute;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = {
+ .set = gfx_v9_4_3_set_eop_interrupt_state,
+ .process = gfx_v9_4_3_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_reg_fault_state,
+ .process = gfx_v9_4_3_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
+ .set = gfx_v9_4_3_set_priv_inst_fault_state,
+ .process = gfx_v9_4_3_priv_inst_irq,
+};
+
+static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
+}
+
+static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs;
+}
+
+
+static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asci gds info */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ /* 9.4.3 removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ adev->gds.gds_size = 0;
+ break;
+ default:
+ adev->gds.gds_size = 0x10000;
+ break;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 3):
+ /* deprecated for 9.4.3, no usage at all */
+ adev->gds.gds_compute_max_wave_id = 0;
+ break;
+ default:
+ /* this really depends on the chip */
+ adev->gds.gds_compute_max_wave_id = 0x7ff;
+ break;
+ }
+
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap, int xcc_id)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ unsigned disable_masks[4 * 4];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ /*
+ * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
+ */
+ if (adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se > 16)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+ gfx_v9_4_3_set_user_cu_inactive_bitmap(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
+ bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
+
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+ gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+ xcc_id);
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 9,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &gfx_v9_4_3_ip_funcs,
+};
+
+static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask;
+ int i, r;
+
+ /* TODO : Initialize golden regs */
+ /* gfx_v9_4_3_init_golden_registers(adev); */
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask)
+ gfx_v9_4_3_xcc_constants_init(adev, i);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v9_4_3_xcc_cp_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for_each_inst(i, inst_mask)
+ gfx_v9_4_3_xcc_fini(adev, i);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
+ .suspend = &gfx_v9_4_3_xcp_suspend,
+ .resume = &gfx_v9_4_3_xcp_resume
+};
+
+struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
+ .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
+ .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
+ .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
+ .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
+};
+
+struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
+ .ras_block = {
+ .hw_ops = &gfx_v9_4_3_ras_ops,
+ },
+ .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
new file mode 100644
index 000000000..42d67ee0e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_3_H__
+#define __GFX_V9_4_3_H__
+
+extern const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs;
+
+#endif /* __GFX_V9_4_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
new file mode 100644
index 000000000..cdc290a47
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "gfxhub_v1_0.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "gc/gc_9_0_default.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+static u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+}
+
+static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, 0,
+ mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(
+ GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+}
+
+static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL2, tmp);
+
+ tmp = mmVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp);
+
+ tmp = mmVM_L2_CNTL4_DEFAULT;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp);
+}
+
+static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ adev->asic_type == CHIP_ALDEBARAN);
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ gfxhub_v1_0_init_gart_aperture_regs(adev);
+ gfxhub_v1_0_init_system_aperture_regs(adev);
+ gfxhub_v1_0_init_tlb_regs(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_init_cache_regs(adev);
+
+ gfxhub_v1_0_enable_system_domain(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_disable_identity_aperture(adev);
+ gfxhub_v1_0_setup_vmid_config(adev);
+ gfxhub_v1_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ if (amdgpu_sriov_vf(adev))
+ /* Avoid write to GMC registers */
+ return;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static void gfxhub_v1_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+}
+
+
+const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
+ .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v1_0_gart_enable,
+ .gart_disable = gfxhub_v1_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default,
+ .init = gfxhub_v1_0_init,
+ .get_xgmi_info = gfxhub_v1_1_get_xgmi_info,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
new file mode 100644
index 000000000..3174bc576
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_0_H__
+#define __GFXHUB_V1_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
new file mode 100644
index 000000000..90f0aefbd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_2_1_offset.h"
+#include "gc/gc_9_2_1_sh_mask.h"
+
+#include "soc15_common.h"
+
+#define mmMC_VM_XGMI_LFB_CNTL_ALDE 0x0978
+#define mmMC_VM_XGMI_LFB_CNTL_ALDE_BASE_IDX 0
+#define mmMC_VM_XGMI_LFB_SIZE_ALDE 0x0979
+#define mmMC_VM_XGMI_LFB_SIZE_ALDE_BASE_IDX 0
+//MC_VM_XGMI_LFB_CNTL
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_LFB_REGION__SHIFT 0x0
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_MAX_REGION__SHIFT 0x4
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_LFB_REGION_MASK 0x0000000FL
+#define MC_VM_XGMI_LFB_CNTL_ALDE__PF_MAX_REGION_MASK 0x000000F0L
+//MC_VM_XGMI_LFB_SIZE
+#define MC_VM_XGMI_LFB_SIZE_ALDE__PF_LFB_SIZE__SHIFT 0x0
+#define MC_VM_XGMI_LFB_SIZE_ALDE__PF_LFB_SIZE_MASK 0x0001FFFFL
+
+int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL_ALDE);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE_ALDE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE, PF_MAX_REGION);
+ } else {
+ xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ }
+
+
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ case CHIP_ARCTURUS:
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+ break;
+ case CHIP_ALDEBARAN:
+ max_num_physical_nodes = 16;
+ max_physical_node_id = 15;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ if (adev->asic_type == CHIP_ALDEBARAN) {
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE,
+ PF_LFB_REGION);
+ } else {
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+ }
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h
new file mode 100644
index 000000000..d753cf28a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_1_H__
+#define __GFXHUB_V1_1_H__
+
+int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
new file mode 100644
index 000000000..0834af771
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "gfxhub_v1_2.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v1_2_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t pt_base;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ for_each_inst(i, xcc_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t value;
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void gfxhub_v1_2_xcc_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void
+gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On 9.4.2 and 9.4.3, XNACK can be enabled in
+ * the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ /* GART Enable. */
+ gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask);
+ gfxhub_v1_2_xcc_init_tlb_regs(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_init_cache_regs(adev, xcc_mask);
+
+ gfxhub_v1_2_xcc_enable_system_domain(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_disable_identity_aperture(adev, xcc_mask);
+ gfxhub_v1_2_xcc_setup_vmid_config(adev, xcc_mask);
+ gfxhub_v1_2_xcc_program_invalidation(adev, xcc_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ return gfxhub_v1_2_xcc_gart_enable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
+}
+
+static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_gart_disable(adev, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value,
+ uint32_t xcc_mask)
+{
+ u32 tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+/**
+ * gfxhub_v1_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, xcc_mask);
+}
+
+static void gfxhub_v1_2_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL -
+ regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance =
+ regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static void gfxhub_v1_2_init(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v1_2_xcc_init(adev, xcc_mask);
+}
+
+static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+
+
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
+ .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v1_2_gart_enable,
+ .gart_disable = gfxhub_v1_2_gart_disable,
+ .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
+ .init = gfxhub_v1_2_init,
+ .get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
+};
+
+static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return gfxhub_v1_2_xcc_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int gfxhub_v1_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_2_xcc_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs = {
+ .suspend = &gfxhub_v1_2_xcp_suspend,
+ .resume = &gfxhub_v1_2_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
new file mode 100644
index 000000000..997e9f90c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_2_H__
+#define __GFXHUB_V1_2_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs;
+
+extern struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
new file mode 100644
index 000000000..a041c6c97
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v2_0.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "gc/gc_10_1_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v2_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+ }
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These regs are not accessible for VF, PF will program these in SRIOV */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
+
+ tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
+
+ tmp = mmGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
+
+ tmp = mmGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ gfxhub_v2_0_init_gart_aperture_regs(adev);
+ gfxhub_v2_0_init_system_aperture_regs(adev);
+ gfxhub_v2_0_init_tlb_regs(adev);
+ gfxhub_v2_0_init_cache_regs(adev);
+
+ gfxhub_v2_0_enable_system_domain(adev);
+ gfxhub_v2_0_disable_identity_aperture(adev);
+ gfxhub_v2_0_setup_vmid_config(adev);
+ gfxhub_v2_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
+ }
+}
+
+/**
+ * gfxhub_v2_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v2_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v2_0_get_invalidate_req,
+};
+
+static void gfxhub_v2_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ -
+ mmGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v2_0_funcs = {
+ .get_fb_location = gfxhub_v2_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v2_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v2_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v2_0_gart_enable,
+ .gart_disable = gfxhub_v2_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v2_0_set_fault_enable_default,
+ .init = gfxhub_v2_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
new file mode 100644
index 000000000..9ddc35cd5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V2_0_H__
+#define __GFXHUB_V2_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v2_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
new file mode 100644
index 000000000..7708d5ded
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v2_1.h"
+
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "gc/gc_10_3_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP 0x16f8
+#define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP_BASE_IDX 0
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v2_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v2_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v2_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v2_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
+
+ tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
+
+ tmp = mmGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
+
+ tmp = mmGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v2_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v2_1_init_gart_aperture_regs(adev);
+ gfxhub_v2_1_init_system_aperture_regs(adev);
+ gfxhub_v2_1_init_tlb_regs(adev);
+ gfxhub_v2_1_init_cache_regs(adev);
+
+ gfxhub_v2_1_enable_system_domain(adev);
+ gfxhub_v2_1_disable_identity_aperture(adev);
+ gfxhub_v2_1_setup_vmid_config(adev);
+ gfxhub_v2_1_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v2_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v2_1_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v2_1_get_invalidate_req,
+};
+
+static void gfxhub_v2_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ -
+ mmGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs;
+}
+
+static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
+ u32 max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ u32 max_num_physical_nodes = 0;
+ u32 max_physical_node_id = 0;
+
+ switch (adev->ip_versions[XGMI_HWIP][0]) {
+ case IP_VERSION(4, 8, 0):
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
+ GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ }
+
+ return 0;
+}
+
+static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
+{
+ int i;
+ u32 tmp = 0, disabled_sa = 0;
+ u32 efuse_setting, vbios_setting;
+
+ u32 max_sa_mask = amdgpu_gfx_create_bitmask(
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 3):
+ /* Get SA disabled bitmap from eFuse setting */
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
+ efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ /* Get SA disabled bitmap from VBIOS setting */
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE);
+ vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+ vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+ disabled_sa |= efuse_setting | vbios_setting;
+ /* Make sure not to report harvested SAs beyond the max SA count */
+ disabled_sa &= max_sa_mask;
+
+ for (i = 0; disabled_sa > 0; i++) {
+ if (disabled_sa & 1)
+ tmp |= 0x3 << (i * 2);
+ disabled_sa >>= 1;
+ }
+ disabled_sa = tmp;
+
+ WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3);
+ adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32);
+ adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32);
+ adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG);
+ adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID);
+ adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2);
+ adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL);
+ adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2);
+ adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2);
+ }
+
+ adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev)
+{
+ int i;
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32);
+ WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL);
+
+ for (i = 0; i <= 15; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]);
+ }
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL);
+}
+
+static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+ int time = 1000;
+
+ gfxhub_v2_1_set_fault_enable_default(adev, false);
+
+ for (i = 0; i <= 14; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, ~0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ 0);
+ }
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK |
+ GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 &&
+ time) {
+ udelay(100);
+ time--;
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ }
+
+ if (!time)
+ DRM_WARN("failed to wait for GRBM(EA) idle\n");
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
+ .get_fb_location = gfxhub_v2_1_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v2_1_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v2_1_gart_enable,
+ .gart_disable = gfxhub_v2_1_gart_disable,
+ .set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default,
+ .init = gfxhub_v2_1_init,
+ .get_xgmi_info = gfxhub_v2_1_get_xgmi_info,
+ .utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
+ .mode2_save_regs = gfxhub_v2_1_save_regs,
+ .mode2_restore_regs = gfxhub_v2_1_restore_regs,
+ .halt = gfxhub_v2_1_halt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h
new file mode 100644
index 000000000..f75c2eccf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V2_1_H__
+#define __GFXHUB_V2_1_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
new file mode 100644
index 000000000..e1c76c070
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -0,0 +1,514 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v3_0_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_init_system_aperture_regs(adev);
+ gfxhub_v3_0_init_tlb_regs(adev);
+ gfxhub_v3_0_init_cache_regs(adev);
+
+ gfxhub_v3_0_enable_system_domain(adev);
+ gfxhub_v3_0_disable_identity_aperture(adev);
+ gfxhub_v3_0_setup_vmid_config(adev);
+ gfxhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs = {
+ .get_fb_location = gfxhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_gart_enable,
+ .gart_disable = gfxhub_v3_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_set_fault_enable_default,
+ .init = gfxhub_v3_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
new file mode 100644
index 000000000..ea345e4e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_H__
+#define __GFXHUB_V3_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
new file mode 100644
index 000000000..07f369c7a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v3_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char * const gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v3_0_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v3_0_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v3_0_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v3_0_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v3_0_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Disable AGP. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v3_0_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v3_0_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned int i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ gfxhub_v3_0_3_init_gart_aperture_regs(adev);
+ gfxhub_v3_0_3_init_system_aperture_regs(adev);
+ gfxhub_v3_0_3_init_tlb_regs(adev);
+ gfxhub_v3_0_3_init_cache_regs(adev);
+
+ gfxhub_v3_0_3_enable_system_domain(adev);
+ gfxhub_v3_0_3_disable_identity_aperture(adev);
+ gfxhub_v3_0_3_setup_vmid_config(adev);
+ gfxhub_v3_0_3_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v3_0_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v3_0_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v3_0_3_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v3_0_3_get_invalidate_req,
+};
+
+static void gfxhub_v3_0_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v3_0_3_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs = {
+ .get_fb_location = gfxhub_v3_0_3_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v3_0_3_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v3_0_3_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v3_0_3_gart_enable,
+ .gart_disable = gfxhub_v3_0_3_gart_disable,
+ .set_fault_enable_default = gfxhub_v3_0_3_set_fault_enable_default,
+ .init = gfxhub_v3_0_3_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
new file mode 100644
index 000000000..6153bd5e3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V3_0_3_H__
+#define __GFXHUB_V3_0_3_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v3_0_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
new file mode 100644
index 000000000..fa87a85e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -0,0 +1,1259 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v10_0.h"
+#include "umc_v8_7.h"
+
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
+#include "dcn/dcn_2_0_0_offset.h"
+#include "dcn/dcn_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "navi10_enum.h"
+
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#include "nbio_v2_3.h"
+
+#include "gfxhub_v2_0.h"
+#include "gfxhub_v2_1.h"
+#include "mmhub_v2_0.h"
+#include "mmhub_v2_3.h"
+#include "athub_v2_0.h"
+#include "athub_v2_1.h"
+
+#include "amdgpu_reset.h"
+
+static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int
+gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ bool retry_fault = !!(entry->src_data[1] & 0x80);
+ bool write_fault = !!(entry->src_data[1] & 0x20);
+ struct amdgpu_task_info task_info;
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (retry_fault) {
+ /* Returning 1 here also prevents sending the IV to the KFD */
+
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault))
+ return 1;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ }
+
+ if (!printk_ratelimit())
+ return 0;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n",
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
+
+ if (!amdgpu_sriov_vf(adev))
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev,
+ status);
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = {
+ .set = gmc_v10_0_vm_fault_interrupt_state,
+ .process = gmc_v10_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v10_0_ecc_funcs = {
+ .set = gmc_v10_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v10_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned int eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, hub_ip);
+
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ inv_req, hub_ip);
+
+ /*
+ * Issue a dummy read to wait for the ACK register to be cleared
+ * to avoid a false ACK due to the new fast GRBM interface.
+ */
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+ RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
+ hub->eng_distance * eng, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
+}
+
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct dma_fence *fence;
+ struct amdgpu_job *job;
+
+ int r;
+
+ /* flush hdp cache */
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ /* For SRIOV run time, driver shouldn't access the register through MMIO
+ * Directly use kiq to do the vm invalidation instead
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned int eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+
+ up_read(&adev->reset_domain->sem);
+ return;
+ }
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+
+ if (vmhub == AMDGPU_MMHUB0(0)) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+ }
+
+ BUG_ON(vmhub != AMDGPU_GFXHUB(0));
+
+ if (!adev->mman.buffer_funcs_enabled ||
+ !adev->ib_pool_ready ||
+ amdgpu_in_reset(adev) ||
+ ring->sched.ready == false) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+ }
+
+ /* The SDMA on Navi has a bug which can theoretically result in memory
+ * corruption if an invalidation happens at the same time as an VA
+ * translation. Avoid this by doing the invalidation from the SDMA
+ * itself.
+ */
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+ &job);
+ if (r)
+ goto error_alloc;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ fence = amdgpu_job_submit(job);
+
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ return;
+
+error_alloc:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ return -ETIME;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+
+ ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0), flush_type);
+ }
+ if (!adev->enable_mes)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned int eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ /* MES fw manages IH_VMID_x_LUT updating */
+ if (ring->is_mes_queue)
+ return;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format on NAVI 10:
+ * 63:59 reserved
+ * 58 reserved and for sienna_cichlid is used for MALL noalloc
+ * 57 reserved
+ * 56 F
+ * 55 L
+ * 54 reserved
+ * 53:52 SW
+ * 51 T
+ * 50:48 mtype
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on NAVI 10:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+{
+ switch (flags) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_NC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_WC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
+ case AMDGPU_VM_MTYPE_CC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
+ case AMDGPU_VM_MTYPE_UC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+ default:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ }
+}
+
+static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE)
+ *flags &= ~AMDGPU_PDE_PTE;
+ else
+ *flags |= AMDGPU_PTE_TF;
+ }
+}
+
+static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+ *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+ *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+ AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+}
+
+static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+
+ return size;
+}
+
+static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
+ .map_mtype = gmc_v10_0_map_mtype,
+ .get_vm_pde = gmc_v10_0_get_vm_pde,
+ .get_vm_pte = gmc_v10_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
+};
+
+static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs;
+}
+
+static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(8, 7, 0):
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+ adev->umc.retire_unit = 1;
+ adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
+ adev->umc.ras = &umc_v8_7_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
+ adev->mmhub.funcs = &mmhub_v2_3_funcs;
+ break;
+ default:
+ adev->mmhub.funcs = &mmhub_v2_0_funcs;
+ break;
+ }
+}
+
+static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ adev->gfxhub.funcs = &gfxhub_v2_1_funcs;
+ break;
+ default:
+ adev->gfxhub.funcs = &gfxhub_v2_0_funcs;
+ break;
+ }
+}
+
+
+static int gmc_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v10_0_set_mmhub_funcs(adev);
+ gmc_v10_0_set_gfxhub_funcs(adev);
+ gmc_v10_0_set_gmc_funcs(adev);
+ gmc_v10_0_set_irq_funcs(adev);
+ gmc_v10_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v10_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->gfxhub.funcs->get_fb_location(adev);
+
+ /* add the xgmi offset of the physical node */
+ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+
+ /* add the xgmi offset of the physical node */
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+}
+
+/**
+ * gmc_v10_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ default:
+ adev->gmc.gart_size = 512ULL << 20;
+ break;
+ case IP_VERSION(10, 3, 1): /* DCE SG support */
+ case IP_VERSION(10, 3, 3): /* DCE SG support */
+ case IP_VERSION(10, 3, 6): /* DCE SG support */
+ case IP_VERSION(10, 3, 7): /* DCE SG support */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
+ gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "NAVI10 PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v10_0_sw_init(void *handle)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfxhub.funcs->init(adev);
+
+ adev->mmhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ if ((adev->flags & AMD_IS_APU) && amdgpu_emu_mode == 1) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64;
+ } else if (amdgpu_emu_mode == 1) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;
+ adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ adev->gmc.mall_size = 128 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 2):
+ adev->gmc.mall_size = 96 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 4):
+ adev->gmc.mall_size = 32 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 5):
+ adev->gmc.mall_size = 16 * 1024 * 1024;
+ break;
+ default:
+ adev->gmc.mall_size = 0;
+ break;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ case IP_VERSION(10, 3, 6):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 7):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v10_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v10_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v10_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v10_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v10_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+}
+
+/**
+ * gmc_v10_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ adev->hdp.funcs->init_registers(adev);
+
+ /* Flush HDP after it is initialized */
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
+ false : true;
+
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+ if (!adev->in_s0ix)
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v10_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v10_0_init_golden_registers(adev);
+
+ /*
+ * harvestable groups in gc_utcl2 need to be programmed before any GFX block
+ * register setup within GMC, or else system hang when harvesting SA.
+ */
+ if (!adev->in_s0ix && adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest)
+ adev->gfxhub.funcs->utcl2_harvest(adev);
+
+ r = gmc_v10_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_emu_mode == 1) {
+ r = amdgpu_gmc_vram_checking(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v10_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
+{
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v10_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v10_0_gart_disable(adev);
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ return 0;
+}
+
+static int gmc_v10_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v10_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v10_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v10_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v10_0_is_idle(void *handle)
+{
+ /* MC is always ready in GMC v10.*/
+ return true;
+}
+
+static int gmc_v10_0_wait_for_idle(void *handle)
+{
+ /* There is no need to wait for MC idle in GMC v10.*/
+ return 0;
+}
+
+static int gmc_v10_0_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int gmc_v10_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+ * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+ * seen any issue on the DF 3.0.2 series platform.
+ */
+ if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+ dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+ return 0;
+ }
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ return athub_v2_1_set_clockgating(adev, state);
+ else
+ return athub_v2_0_set_clockgating(adev, state);
+}
+
+static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+ return;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ athub_v2_1_get_clockgating(adev, flags);
+ else
+ athub_v2_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
+ .name = "gmc_v10_0",
+ .early_init = gmc_v10_0_early_init,
+ .late_init = gmc_v10_0_late_init,
+ .sw_init = gmc_v10_0_sw_init,
+ .sw_fini = gmc_v10_0_sw_fini,
+ .hw_init = gmc_v10_0_hw_init,
+ .hw_fini = gmc_v10_0_hw_fini,
+ .suspend = gmc_v10_0_suspend,
+ .resume = gmc_v10_0_resume,
+ .is_idle = gmc_v10_0_is_idle,
+ .wait_for_idle = gmc_v10_0_wait_for_idle,
+ .soft_reset = gmc_v10_0_soft_reset,
+ .set_clockgating_state = gmc_v10_0_set_clockgating_state,
+ .set_powergating_state = gmc_v10_0_set_powergating_state,
+ .get_clockgating_state = gmc_v10_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v10_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h
new file mode 100644
index 000000000..7daa53d89
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V10_0_H__
+#define __GMC_V10_0_H__
+
+extern const struct amd_ip_funcs gmc_v10_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v10_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
new file mode 100644
index 000000000..e3b76fd28
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -0,0 +1,1074 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v11_0.h"
+#include "umc_v8_10.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "dcn/dcn_3_2_0_offset.h"
+#include "dcn/dcn_3_2_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "navi10_enum.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbio_v4_3.h"
+#include "gfxhub_v3_0.h"
+#include "gfxhub_v3_0_3.h"
+#include "mmhub_v3_0.h"
+#include "mmhub_v3_0_1.h"
+#include "mmhub_v3_0_2.h"
+#include "athub_v3_0.h"
+
+
+static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int
+gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ?
+ AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+ if (!amdgpu_sriov_vf(adev))
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_irq_funcs = {
+ .set = gmc_v11_0_vm_fault_interrupt_state,
+ .process = gmc_v11_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v11_0_ecc_funcs = {
+ .set = gmc_v11_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v11_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v11_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v11_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v11_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned int eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ /* flush hdp cache */
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ /* For SRIOV run time, driver shouldn't access the register through MMIO
+ * Directly use kiq to do the vm invalidation instead
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned int eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+ return;
+ }
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+}
+
+/**
+ * gmc_v11_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq[0].ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ return -ETIME;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[0].ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ gmc_v11_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v11_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0), flush_type);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned int eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ /* MES fw manages IH_VMID_x_LUT updating */
+ if (ring->is_mes_queue)
+ return;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63:59 reserved
+ * 58:57 reserved
+ * 56 F
+ * 55 L
+ * 54 reserved
+ * 53:52 SW
+ * 51 T
+ * 50:48 mtype
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+{
+ switch (flags) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_NC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_WC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
+ case AMDGPU_VM_MTYPE_CC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
+ case AMDGPU_VM_MTYPE_UC:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+ default:
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ }
+}
+
+static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE)
+ *flags &= ~AMDGPU_PDE_PTE;
+ else
+ *flags |= AMDGPU_PTE_TF;
+ }
+}
+
+static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+ *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+ *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_UNCACHED))
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+ AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+}
+
+static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, regD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, regHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, regHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+
+ return size;
+}
+
+static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v11_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
+ .map_mtype = gmc_v11_0_map_mtype,
+ .get_vm_pde = gmc_v11_0_get_vm_pde,
+ .get_vm_pte = gmc_v11_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
+};
+
+static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v11_0_gmc_funcs;
+}
+
+static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(8, 10, 0):
+ adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
+ if (adev->umc.node_inst_num == 4)
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0];
+ else
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
+ adev->umc.ras = &umc_v8_10_ras;
+ break;
+ case IP_VERSION(8, 11, 0):
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
+ break;
+ case IP_VERSION(3, 0, 2):
+ adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
+ break;
+ default:
+ adev->mmhub.funcs = &mmhub_v3_0_funcs;
+ break;
+ }
+}
+
+static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 3):
+ adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs;
+ break;
+ default:
+ adev->gfxhub.funcs = &gfxhub_v3_0_funcs;
+ break;
+ }
+}
+
+static int gmc_v11_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v11_0_set_gfxhub_funcs(adev);
+ gmc_v11_0_set_mmhub_funcs(adev);
+ gmc_v11_0_set_gmc_funcs(adev);
+ gmc_v11_0_set_irq_funcs(adev);
+ gmc_v11_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v11_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v11_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v11_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1)
+ adev->gmc.gart_size = 512ULL << 20;
+ else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v11_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v11_0_sw_init(void *handle)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mmhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(11, 0, 4):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v11_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v11_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v11_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v11_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32(hub->vm_contexts_disable, 0);
+ return;
+ }
+}
+
+/**
+ * gmc_v11_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
+ false : true;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v11_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v11_0_init_golden_registers(adev);
+
+ r = gmc_v11_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v11_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v11_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ gmc_v11_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v11_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v11_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v11_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v11_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v11_0_is_idle(void *handle)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v11_0_wait_for_idle(void *handle)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v11_0_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int gmc_v11_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v3_0_set_clockgating(adev, state);
+}
+
+static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v3_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v11_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
+ .name = "gmc_v11_0",
+ .early_init = gmc_v11_0_early_init,
+ .sw_init = gmc_v11_0_sw_init,
+ .hw_init = gmc_v11_0_hw_init,
+ .late_init = gmc_v11_0_late_init,
+ .sw_fini = gmc_v11_0_sw_fini,
+ .hw_fini = gmc_v11_0_hw_fini,
+ .suspend = gmc_v11_0_suspend,
+ .resume = gmc_v11_0_resume,
+ .is_idle = gmc_v11_0_is_idle,
+ .wait_for_idle = gmc_v11_0_wait_for_idle,
+ .soft_reset = gmc_v11_0_soft_reset,
+ .set_clockgating_state = gmc_v11_0_set_clockgating_state,
+ .set_powergating_state = gmc_v11_0_set_powergating_state,
+ .get_clockgating_state = gmc_v11_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
new file mode 100644
index 000000000..def4d5516
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V11_0_H__
+#define __GMC_V11_0_H__
+
+extern const struct amd_ip_funcs gmc_v11_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
new file mode 100644
index 000000000..5b837a65f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -0,0 +1,1149 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+#include "amdgpu.h"
+#include "gmc_v6_0.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_gem.h"
+
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+#include "si_enums.h"
+
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
+static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v6_0_wait_for_idle(void *handle);
+
+MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
+MODULE_FIRMWARE("amdgpu/verde_mc.bin");
+MODULE_FIRMWARE("amdgpu/oland_mc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_mc.bin");
+MODULE_FIRMWARE("amdgpu/si58_mc.bin");
+
+#define MC_SEQ_MISC0__MT__MASK 0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
+#define MC_SEQ_MISC0__MT__DDR2 0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3 0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4 0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5 0x50000000
+#define MC_SEQ_MISC0__MT__HBM 0x60000000
+#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
+
+static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
+{
+ u32 blackout;
+
+ gmc_v6_0_wait_for_idle((void *)adev);
+
+ blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
+ /* Block CPU access */
+ WREG32(mmBIF_FB_EN, 0);
+ /* blackout the MC */
+ blackout = REG_SET_FIELD(blackout,
+ MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, blackout | 1);
+ }
+ /* wait for the MC to settle */
+ udelay(100);
+
+}
+
+static void gmc_v6_0_mc_resume(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* unblackout the MC */
+ tmp = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, tmp);
+ /* allow CPU access */
+ tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
+ tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
+ WREG32(mmBIF_FB_EN, tmp);
+}
+
+static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ bool is_58_fw = false;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ chip_name = "tahiti";
+ break;
+ case CHIP_PITCAIRN:
+ chip_name = "pitcairn";
+ break;
+ case CHIP_VERDE:
+ chip_name = "verde";
+ break;
+ case CHIP_OLAND:
+ chip_name = "oland";
+ break;
+ case CHIP_HAINAN:
+ chip_name = "hainan";
+ break;
+ default:
+ BUG();
+ }
+
+ /* this memory configuration requires special firmware */
+ if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
+ is_58_fw = true;
+
+ if (is_58_fw)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ if (err) {
+ dev_err(adev->dev,
+ "si_mc: Failed to load firmware \"%s\"\n",
+ fw_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
+ }
+ return err;
+}
+
+static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
+{
+ const __le32 *new_fw_data = NULL;
+ u32 running;
+ const __le32 *new_io_mc_regs = NULL;
+ int i, regs_size, ucode_size;
+ const struct mc_firmware_header_v1_0 *hdr;
+
+ if (!adev->gmc.fw)
+ return -EINVAL;
+
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
+
+ amdgpu_ucode_print_mc_hdr(&hdr->header);
+
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
+ new_io_mc_regs = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ new_fw_data = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;
+
+ if (running == 0) {
+
+ /* reset the engine and set to writable */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);
+
+ /* load mc io regs */
+ for (i = 0; i < regs_size; i++) {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
+ WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
+ }
+ /* load the MC ucode */
+ for (i = 0; i < ucode_size; i++)
+ WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
+
+ /* put the engine back into the active state */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);
+
+ /* wait for training to complete */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL) & MC_SEQ_TRAIN_WAKEUP_CNTL__TRAIN_DONE_D0_MASK)
+ break;
+ udelay(1);
+ }
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL) & MC_SEQ_TRAIN_WAKEUP_CNTL__TRAIN_DONE_D1_MASK)
+ break;
+ udelay(1);
+ }
+
+ }
+
+ return 0;
+}
+
+static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
+ base <<= 24;
+
+ amdgpu_gmc_vram_location(adev, mc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+}
+
+static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x6) {
+ WREG32((0xb05 + j), 0x00000000);
+ WREG32((0xb06 + j), 0x00000000);
+ WREG32((0xb07 + j), 0x00000000);
+ WREG32((0xb08 + j), 0x00000000);
+ WREG32((0xb09 + j), 0x00000000);
+ }
+ WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+ if (gmc_v6_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+
+ if (adev->mode_info.num_crtc) {
+ u32 tmp;
+
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ tmp |= VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK;
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ tmp &= ~VGA_VSTATUS_CNTL;
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+ }
+ /* Update configuration */
+ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+ adev->mem_scratch.gpu_addr >> 12);
+ WREG32(mmMC_VM_AGP_BASE, 0);
+ WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+
+ if (gmc_v6_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+}
+
+static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
+{
+
+ u32 tmp;
+ int chansize, numchan;
+ int r;
+
+ tmp = RREG32(mmMC_ARB_RAMCFG);
+ if (tmp & (1 << 11))
+ chansize = 16;
+ else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK)
+ chansize = 64;
+ else
+ chansize = 32;
+
+ tmp = RREG32(mmMC_SHARED_CHMAP);
+ switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
+ case 0:
+ default:
+ numchan = 1;
+ break;
+ case 1:
+ numchan = 2;
+ break;
+ case 2:
+ numchan = 4;
+ break;
+ case 3:
+ numchan = 8;
+ break;
+ case 4:
+ numchan = 3;
+ break;
+ case 5:
+ numchan = 6;
+ break;
+ case 6:
+ numchan = 10;
+ break;
+ case 7:
+ numchan = 12;
+ break;
+ case 8:
+ numchan = 16;
+ break;
+ }
+ adev->gmc.vram_width = numchan * chansize;
+ /* size in MB on si */
+ adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_HAINAN: /* no MM engines */
+ default:
+ adev->gmc.gart_size = 256ULL << 20;
+ break;
+ case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
+ case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
+ case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
+ case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
+ gmc_v6_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
+static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ /* write new base address */
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8);
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
+}
+
+static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
+static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+}
+
+ /**
+ * gmc_v8_0_set_prt() - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
+static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && !adev->gmc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->gmc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES,
+ enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES,
+ enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
+static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
+{
+ uint64_t table_addr;
+ u32 field;
+ int i;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
+ /* Setup TLB control */
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL,
+ (0xA << 7) |
+ MC_VM_MX_L1_TLB_CNTL__ENABLE_L1_TLB_MASK |
+ MC_VM_MX_L1_TLB_CNTL__ENABLE_L1_FRAGMENT_PROCESSING_MASK |
+ MC_VM_MX_L1_TLB_CNTL__SYSTEM_ACCESS_MODE_MASK |
+ MC_VM_MX_L1_TLB_CNTL__ENABLE_ADVANCED_DRIVER_MODEL_MASK |
+ (0UL << MC_VM_MX_L1_TLB_CNTL__SYSTEM_APERTURE_UNMAPPED_ACCESS__SHIFT));
+ /* Setup L2 cache */
+ WREG32(mmVM_L2_CNTL,
+ VM_L2_CNTL__ENABLE_L2_CACHE_MASK |
+ VM_L2_CNTL__ENABLE_L2_FRAGMENT_PROCESSING_MASK |
+ VM_L2_CNTL__ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+ VM_L2_CNTL__ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+ (7UL << VM_L2_CNTL__EFFECTIVE_L2_QUEUE_SIZE__SHIFT) |
+ (1UL << VM_L2_CNTL__CONTEXT1_IDENTITY_ACCESS_MODE__SHIFT));
+ WREG32(mmVM_L2_CNTL2,
+ VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK |
+ VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK);
+
+ field = adev->vm_manager.fragment_size;
+ WREG32(mmVM_L2_CNTL3,
+ VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
+ (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
+ (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
+ /* setup context0 */
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT0_CNTL2, 0);
+ WREG32(mmVM_CONTEXT0_CNTL,
+ VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
+ (0UL << VM_CONTEXT0_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
+ VM_CONTEXT0_CNTL__RANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK);
+
+ WREG32(0x575, 0);
+ WREG32(0x576, 0);
+ WREG32(0x577, 0);
+
+ /* empty context1-15 */
+ /* set vm size, must be a multiple of 4 */
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1);
+ /* Assign the pt base to something valid for now; the pts used for
+ * the VMs are determined by the application and setup and assigned
+ * on the fly in the vm part of radeon_gart.c
+ */
+ for (i = 1; i < AMDGPU_NUM_VMID; i++) {
+ if (i < 8)
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
+ table_addr >> 12);
+ else
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
+ table_addr >> 12);
+ }
+
+ /* enable context1-15 */
+ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT1_CNTL2, 4);
+ WREG32(mmVM_CONTEXT1_CNTL,
+ VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
+ (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
+ ((adev->vm_manager.block_size - 9)
+ << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ gmc_v6_0_set_fault_enable_default(adev, false);
+ else
+ gmc_v6_0_set_fault_enable_default(adev, true);
+
+ gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
+ dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)table_addr);
+ return 0;
+}
+
+static int gmc_v6_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ dev_warn(adev->dev, "gmc_v6_0 PCIE GART already initialized\n");
+ return 0;
+ }
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = 0;
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
+{
+ /*unsigned i;
+
+ for (i = 1; i < 16; ++i) {
+ uint32_t reg;
+ if (i < 8)
+ reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i ;
+ else
+ reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (i - 8);
+ adev->vm_manager.saved_table_addr[i] = RREG32(reg);
+ }*/
+
+ /* Disable all tables */
+ WREG32(mmVM_CONTEXT0_CNTL, 0);
+ WREG32(mmVM_CONTEXT1_CNTL, 0);
+ /* Setup TLB control */
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL,
+ MC_VM_MX_L1_TLB_CNTL__SYSTEM_ACCESS_MODE_MASK |
+ (0UL << MC_VM_MX_L1_TLB_CNTL__SYSTEM_APERTURE_UNMAPPED_ACCESS__SHIFT));
+ /* Setup L2 cache */
+ WREG32(mmVM_L2_CNTL,
+ VM_L2_CNTL__ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+ VM_L2_CNTL__ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+ (7UL << VM_L2_CNTL__EFFECTIVE_L2_QUEUE_SIZE__SHIFT) |
+ (1UL << VM_L2_CNTL__CONTEXT1_IDENTITY_ACCESS_MODE__SHIFT));
+ WREG32(mmVM_L2_CNTL2, 0);
+ WREG32(mmVM_L2_CNTL3,
+ VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
+ (0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
+}
+
+static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
+ u32 status, u32 addr, u32 mc_client)
+{
+ u32 mc_id;
+ u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+ u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+ char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
+ (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+
+ mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ protections, vmid, addr,
+ REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_RW) ?
+ "write" : "read", block, mc_client, mc_id);
+}
+
+/*
+static const u32 mc_cg_registers[] = {
+ MC_HUB_MISC_HUB_CG,
+ MC_HUB_MISC_SIP_CG,
+ MC_HUB_MISC_VM_CG,
+ MC_XPB_CLK_GAT,
+ ATC_MISC_CG,
+ MC_CITF_MISC_WR_CG,
+ MC_CITF_MISC_RD_CG,
+ MC_CITF_MISC_VM_CG,
+ VM_L2_CG,
+};
+
+static const u32 mc_cg_ls_en[] = {
+ MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK,
+ MC_HUB_MISC_SIP_CG__MEM_LS_ENABLE_MASK,
+ MC_HUB_MISC_VM_CG__MEM_LS_ENABLE_MASK,
+ MC_XPB_CLK_GAT__MEM_LS_ENABLE_MASK,
+ ATC_MISC_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_WR_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_RD_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_VM_CG__MEM_LS_ENABLE_MASK,
+ VM_L2_CG__MEM_LS_ENABLE_MASK,
+};
+
+static const u32 mc_cg_en[] = {
+ MC_HUB_MISC_HUB_CG__ENABLE_MASK,
+ MC_HUB_MISC_SIP_CG__ENABLE_MASK,
+ MC_HUB_MISC_VM_CG__ENABLE_MASK,
+ MC_XPB_CLK_GAT__ENABLE_MASK,
+ ATC_MISC_CG__ENABLE_MASK,
+ MC_CITF_MISC_WR_CG__ENABLE_MASK,
+ MC_CITF_MISC_RD_CG__ENABLE_MASK,
+ MC_CITF_MISC_VM_CG__ENABLE_MASK,
+ VM_L2_CG__ENABLE_MASK,
+};
+
+static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ data |= mc_cg_ls_en[i];
+ else
+ data &= ~mc_cg_ls_en[i];
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ data |= mc_cg_en[i];
+ else
+ data &= ~mc_cg_en[i];
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+
+ if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_AGGRESSIVE_LS_EN, 1);
+ } else {
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_AGGRESSIVE_LS_EN, 0);
+ }
+
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_CNTL2, data);
+}
+
+static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
+
+ if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
+ else
+ data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
+
+ if (orig != data)
+ WREG32(mmHDP_HOST_PATH_CNTL, data);
+}
+
+static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(mmHDP_MEM_POWER_LS);
+
+ if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
+ else
+ data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
+
+ if (orig != data)
+ WREG32(mmHDP_MEM_POWER_LS, data);
+}
+*/
+
+static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
+{
+ switch (mc_seq_vram_type) {
+ case MC_SEQ_MISC0__MT__GDDR1:
+ return AMDGPU_VRAM_TYPE_GDDR1;
+ case MC_SEQ_MISC0__MT__DDR2:
+ return AMDGPU_VRAM_TYPE_DDR2;
+ case MC_SEQ_MISC0__MT__GDDR3:
+ return AMDGPU_VRAM_TYPE_GDDR3;
+ case MC_SEQ_MISC0__MT__GDDR4:
+ return AMDGPU_VRAM_TYPE_GDDR4;
+ case MC_SEQ_MISC0__MT__GDDR5:
+ return AMDGPU_VRAM_TYPE_GDDR5;
+ case MC_SEQ_MISC0__MT__DDR3:
+ return AMDGPU_VRAM_TYPE_DDR3;
+ default:
+ return AMDGPU_VRAM_TYPE_UNKNOWN;
+ }
+}
+
+static int gmc_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v6_0_set_gmc_funcs(adev);
+ gmc_v6_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int gmc_v6_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+ else
+ return 0;
+}
+
+static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ return size;
+}
+
+static int gmc_v6_0_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ } else {
+ u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
+ tmp &= MC_SEQ_MISC0__MT__MASK;
+ adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
+ }
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
+
+ adev->gmc.mc_mask = 0xffffffffffULL;
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
+ if (r) {
+ dev_warn(adev->dev, "No suitable DMA available.\n");
+ return r;
+ }
+ adev->need_swiotlb = drm_need_swiotlb(40);
+
+ r = gmc_v6_0_init_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to load mc firmware!\n");
+ return r;
+ }
+
+ r = gmc_v6_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v6_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = 8;
+ amdgpu_vm_manager_init(adev);
+
+ /* base offset of vram pages */
+ if (adev->flags & AMD_IS_APU) {
+ u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+ tmp <<= 22;
+ adev->vm_manager.vram_base_offset = tmp;
+ } else {
+ adev->vm_manager.vram_base_offset = 0;
+ }
+
+ return 0;
+}
+
+static int gmc_v6_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_gem_force_release(adev);
+ amdgpu_vm_manager_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
+ amdgpu_bo_fini(adev);
+ amdgpu_ucode_release(&adev->gmc.fw);
+
+ return 0;
+}
+
+static int gmc_v6_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v6_0_mc_program(adev);
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = gmc_v6_0_mc_load_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to load MC firmware!\n");
+ return r;
+ }
+ }
+
+ r = gmc_v6_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+ else
+ return r;
+}
+
+static int gmc_v6_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ gmc_v6_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v6_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v6_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v6_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v6_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v6_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK | SRBM_STATUS__VMC_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int gmc_v6_0_wait_for_idle(void *handle)
+{
+ unsigned int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gmc_v6_0_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+
+}
+
+static int gmc_v6_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) {
+ if (!(adev->flags & AMD_IS_APU))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
+ }
+
+ if (srbm_soft_reset) {
+ gmc_v6_0_mc_stop(adev);
+ if (gmc_v6_0_wait_for_idle(adev))
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ gmc_v6_0_mc_resume(adev);
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int gmc_v6_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp;
+ u32 bits = (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 addr, status;
+
+ addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
+ status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+ WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
+
+ if (!addr && !status)
+ return 0;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
+ gmc_v6_0_set_fault_enable_default(adev, false);
+
+ if (printk_ratelimit()) {
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ addr);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ status);
+ gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+ }
+
+ return 0;
+}
+
+static int gmc_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int gmc_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
+ .name = "gmc_v6_0",
+ .early_init = gmc_v6_0_early_init,
+ .late_init = gmc_v6_0_late_init,
+ .sw_init = gmc_v6_0_sw_init,
+ .sw_fini = gmc_v6_0_sw_fini,
+ .hw_init = gmc_v6_0_hw_init,
+ .hw_fini = gmc_v6_0_hw_fini,
+ .suspend = gmc_v6_0_suspend,
+ .resume = gmc_v6_0_resume,
+ .is_idle = gmc_v6_0_is_idle,
+ .wait_for_idle = gmc_v6_0_wait_for_idle,
+ .soft_reset = gmc_v6_0_soft_reset,
+ .set_clockgating_state = gmc_v6_0_set_clockgating_state,
+ .set_powergating_state = gmc_v6_0_set_powergating_state,
+};
+
+static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
+ .set_prt = gmc_v6_0_set_prt,
+ .get_vm_pde = gmc_v6_0_get_vm_pde,
+ .get_vm_pte = gmc_v6_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v6_0_get_vbios_fb_size,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
+ .set = gmc_v6_0_vm_fault_interrupt_state,
+ .process = gmc_v6_0_process_interrupt,
+};
+
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
+}
+
+static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version gmc_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h
new file mode 100644
index 000000000..ed2f64dec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V6_0_H__
+#define __GMC_V6_0_H__
+
+extern const struct amdgpu_ip_block_version gmc_v6_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
new file mode 100644
index 000000000..6a6929ac2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -0,0 +1,1400 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+#include "amdgpu.h"
+#include "cikd.h"
+#include "cik.h"
+#include "gmc_v7_0.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "amdgpu_atombios.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
+static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v7_0_wait_for_idle(void *handle);
+
+MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
+
+static const u32 golden_settings_iceland_a11[] = {
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 iceland_mgcg_cgcg_init[] = {
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ amdgpu_device_program_register_sequence(adev,
+ iceland_mgcg_cgcg_init,
+ ARRAY_SIZE(iceland_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_iceland_a11,
+ ARRAY_SIZE(golden_settings_iceland_a11));
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
+{
+ u32 blackout;
+
+ gmc_v7_0_wait_for_idle((void *)adev);
+
+ blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
+ /* Block CPU access */
+ WREG32(mmBIF_FB_EN, 0);
+ /* blackout the MC */
+ blackout = REG_SET_FIELD(blackout,
+ MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, blackout | 1);
+ }
+ /* wait for the MC to settle */
+ udelay(100);
+}
+
+static void gmc_v7_0_mc_resume(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* unblackout the MC */
+ tmp = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, tmp);
+ /* allow CPU access */
+ tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
+ tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
+ WREG32(mmBIF_FB_EN, tmp);
+}
+
+/**
+ * gmc_v7_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ chip_name = "bonaire";
+ break;
+ case CHIP_HAWAII:
+ chip_name = "hawaii";
+ break;
+ case CHIP_TOPAZ:
+ chip_name = "topaz";
+ break;
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ if (err) {
+ pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
+ }
+ return err;
+}
+
+/**
+ * gmc_v7_0_mc_load_microcode - load MC ucode into the hw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Load the GDDR MC ucode into the hw (CIK).
+ * Returns 0 on success, error on failure.
+ */
+static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct mc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data = NULL;
+ const __le32 *io_mc_regs = NULL;
+ u32 running;
+ int i, ucode_size, regs_size;
+
+ if (!adev->gmc.fw)
+ return -EINVAL;
+
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
+ amdgpu_ucode_print_mc_hdr(&hdr->header);
+
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
+ io_mc_regs = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ fw_data = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
+
+ if (running == 0) {
+ /* reset the engine and set to writable */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);
+
+ /* load mc io regs */
+ for (i = 0; i < regs_size; i++) {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(io_mc_regs++));
+ WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(io_mc_regs++));
+ }
+ /* load the MC ucode */
+ for (i = 0; i < ucode_size; i++)
+ WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(fw_data++));
+
+ /* put the engine back into the active state */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);
+
+ /* wait for training to complete */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (REG_GET_FIELD(RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL),
+ MC_SEQ_TRAIN_WAKEUP_CNTL, TRAIN_DONE_D0))
+ break;
+ udelay(1);
+ }
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (REG_GET_FIELD(RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL),
+ MC_SEQ_TRAIN_WAKEUP_CNTL, TRAIN_DONE_D1))
+ break;
+ udelay(1);
+ }
+ }
+
+ return 0;
+}
+
+static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+
+ base <<= 24;
+
+ amdgpu_gmc_vram_location(adev, mc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+}
+
+/**
+ * gmc_v7_0_mc_program - program the GPU memory controller
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set the location of vram, gart, and AGP in the GPU's
+ * physical address space (CIK).
+ */
+static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i, j;
+
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x6) {
+ WREG32((0xb05 + j), 0x00000000);
+ WREG32((0xb06 + j), 0x00000000);
+ WREG32((0xb07 + j), 0x00000000);
+ WREG32((0xb08 + j), 0x00000000);
+ WREG32((0xb09 + j), 0x00000000);
+ }
+ WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+ if (gmc_v7_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+
+ if (adev->mode_info.num_crtc) {
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+ }
+ /* Update configuration */
+ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+ adev->mem_scratch.gpu_addr >> 12);
+ WREG32(mmMC_VM_AGP_BASE, 0);
+ WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+ if (gmc_v7_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+
+ WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+
+ tmp = RREG32(mmHDP_MISC_CNTL);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
+ WREG32(mmHDP_MISC_CNTL, tmp);
+
+ tmp = RREG32(mmHDP_HOST_PATH_CNTL);
+ WREG32(mmHDP_HOST_PATH_CNTL, tmp);
+}
+
+/**
+ * gmc_v7_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space (CIK).
+ * Returns 0 for success.
+ */
+static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
+ u32 tmp;
+ int chansize, numchan;
+
+ /* Get VRAM informations */
+ tmp = RREG32(mmMC_ARB_RAMCFG);
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
+ chansize = 64;
+ else
+ chansize = 32;
+
+ tmp = RREG32(mmMC_SHARED_CHMAP);
+ switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+ case 0:
+ default:
+ numchan = 1;
+ break;
+ case 1:
+ numchan = 2;
+ break;
+ case 2:
+ numchan = 4;
+ break;
+ case 3:
+ numchan = 8;
+ break;
+ case 4:
+ numchan = 3;
+ break;
+ case 5:
+ numchan = 6;
+ break;
+ case 6:
+ numchan = 10;
+ break;
+ case 7:
+ numchan = 12;
+ break;
+ case 8:
+ numchan = 16;
+ break;
+ }
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ /* size in MB on si */
+ adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) &&
+ adev->gmc.real_vram_size > adev->gmc.aper_size &&
+ !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ: /* no MM engines */
+ default:
+ adev->gmc.gart_size = 256ULL << 20;
+ break;
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
+ case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */
+ case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
+ case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
+ case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+#endif
+ }
+ } else {
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
+ gmc_v7_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: type of flush
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+/**
+ * gmc_v7_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: type of flush
+ * *
+ * Flush the TLB for the requested page table (CIK).
+ */
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ /* bits 0-15 are the VM contexts0-15 */
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
+static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
+static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
+}
+
+static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
+/**
+ * gmc_v7_0_set_fault_enable_default - update VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+}
+
+/**
+ * gmc_v7_0_set_prt - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
+static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t tmp;
+
+ if (enable && !adev->gmc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->gmc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ MASK_PDE0_FAULT, enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
+/**
+ * gmc_v7_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This sets up the TLBs, programs the page tables for VMID0,
+ * sets up the hw for VMIDs 1-15 which are allocated on
+ * demand, and sets up the global locations for the LDS, GDS,
+ * and GPUVM for FSA64 clients (CIK).
+ * Returns 0 for success, errors for failure.
+ */
+static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
+{
+ uint64_t table_addr;
+ u32 tmp, field;
+ int i;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
+ /* Setup TLB control */
+ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ /* Setup L2 cache */
+ tmp = RREG32(mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ WREG32(mmVM_L2_CNTL, tmp);
+ tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32(mmVM_L2_CNTL2, tmp);
+
+ field = adev->vm_manager.fragment_size;
+ tmp = RREG32(mmVM_L2_CNTL3);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
+ WREG32(mmVM_L2_CNTL3, tmp);
+ /* setup context0 */
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT0_CNTL2, 0);
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+
+ WREG32(0x575, 0);
+ WREG32(0x576, 0);
+ WREG32(0x577, 0);
+
+ /* empty context1-15 */
+ /* FIXME start with 4G, once using 2 level pt switch to full
+ * vm size space
+ */
+ /* set vm size, must be a multiple of 4 */
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1);
+ for (i = 1; i < AMDGPU_NUM_VMID; i++) {
+ if (i < 8)
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
+ table_addr >> 12);
+ else
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
+ table_addr >> 12);
+ }
+
+ /* enable context1-15 */
+ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT1_CNTL2, 4);
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ gmc_v7_0_set_fault_enable_default(adev, false);
+ else
+ gmc_v7_0_set_fault_enable_default(adev, true);
+
+ if (adev->asic_type == CHIP_KAVERI) {
+ tmp = RREG32(mmCHUB_CONTROL);
+ tmp &= ~BYPASS_VM;
+ WREG32(mmCHUB_CONTROL, tmp);
+ }
+
+ gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)table_addr);
+ return 0;
+}
+
+static int gmc_v7_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "R600 PCIE GART already initialized\n");
+ return 0;
+ }
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = 0;
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+/**
+ * gmc_v7_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table (CIK).
+ */
+static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* Disable all tables */
+ WREG32(mmVM_CONTEXT0_CNTL, 0);
+ WREG32(mmVM_CONTEXT1_CNTL, 0);
+ /* Setup TLB control */
+ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ /* Setup L2 cache */
+ tmp = RREG32(mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32(mmVM_L2_CNTL, tmp);
+ WREG32(mmVM_L2_CNTL2, 0);
+}
+
+/**
+ * gmc_v7_0_vm_decode_fault - print human readable fault info
+ *
+ * @adev: amdgpu_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
+ * @pasid: debug logging only - no functional use
+ *
+ * Print human readable fault information (CIK).
+ */
+static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+ u32 addr, u32 mc_client, unsigned int pasid)
+{
+ u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+ u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+ char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
+ (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+ u32 mc_id;
+
+ mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ protections, vmid, pasid, addr,
+ REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_RW) ?
+ "write" : "read", block, mc_client, mc_id);
+}
+
+
+static const u32 mc_cg_registers[] = {
+ mmMC_HUB_MISC_HUB_CG,
+ mmMC_HUB_MISC_SIP_CG,
+ mmMC_HUB_MISC_VM_CG,
+ mmMC_XPB_CLK_GAT,
+ mmATC_MISC_CG,
+ mmMC_CITF_MISC_WR_CG,
+ mmMC_CITF_MISC_RD_CG,
+ mmMC_CITF_MISC_VM_CG,
+ mmVM_L2_CG,
+};
+
+static const u32 mc_cg_ls_en[] = {
+ MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK,
+ MC_HUB_MISC_SIP_CG__MEM_LS_ENABLE_MASK,
+ MC_HUB_MISC_VM_CG__MEM_LS_ENABLE_MASK,
+ MC_XPB_CLK_GAT__MEM_LS_ENABLE_MASK,
+ ATC_MISC_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_WR_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_RD_CG__MEM_LS_ENABLE_MASK,
+ MC_CITF_MISC_VM_CG__MEM_LS_ENABLE_MASK,
+ VM_L2_CG__MEM_LS_ENABLE_MASK,
+};
+
+static const u32 mc_cg_en[] = {
+ MC_HUB_MISC_HUB_CG__ENABLE_MASK,
+ MC_HUB_MISC_SIP_CG__ENABLE_MASK,
+ MC_HUB_MISC_VM_CG__ENABLE_MASK,
+ MC_XPB_CLK_GAT__ENABLE_MASK,
+ ATC_MISC_CG__ENABLE_MASK,
+ MC_CITF_MISC_WR_CG__ENABLE_MASK,
+ MC_CITF_MISC_RD_CG__ENABLE_MASK,
+ MC_CITF_MISC_VM_CG__ENABLE_MASK,
+ VM_L2_CG__ENABLE_MASK,
+};
+
+static void gmc_v7_0_enable_mc_ls(struct amdgpu_device *adev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ data |= mc_cg_ls_en[i];
+ else
+ data &= ~mc_cg_ls_en[i];
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void gmc_v7_0_enable_mc_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ int i;
+ u32 orig, data;
+
+ for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+ orig = data = RREG32(mc_cg_registers[i]);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= mc_cg_en[i];
+ else
+ data &= ~mc_cg_en[i];
+ if (data != orig)
+ WREG32(mc_cg_registers[i], data);
+ }
+}
+
+static void gmc_v7_0_enable_bif_mgls(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_AGGRESSIVE_LS_EN, 1);
+ } else {
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 0);
+ data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_AGGRESSIVE_LS_EN, 0);
+ }
+
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_CNTL2, data);
+}
+
+static void gmc_v7_0_enable_hdp_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
+ else
+ data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
+
+ if (orig != data)
+ WREG32(mmHDP_HOST_PATH_CNTL, data);
+}
+
+static void gmc_v7_0_enable_hdp_ls(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ orig = data = RREG32(mmHDP_MEM_POWER_LS);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
+ else
+ data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
+
+ if (orig != data)
+ WREG32(mmHDP_MEM_POWER_LS, data);
+}
+
+static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type)
+{
+ switch (mc_seq_vram_type) {
+ case MC_SEQ_MISC0__MT__GDDR1:
+ return AMDGPU_VRAM_TYPE_GDDR1;
+ case MC_SEQ_MISC0__MT__DDR2:
+ return AMDGPU_VRAM_TYPE_DDR2;
+ case MC_SEQ_MISC0__MT__GDDR3:
+ return AMDGPU_VRAM_TYPE_GDDR3;
+ case MC_SEQ_MISC0__MT__GDDR4:
+ return AMDGPU_VRAM_TYPE_GDDR4;
+ case MC_SEQ_MISC0__MT__GDDR5:
+ return AMDGPU_VRAM_TYPE_GDDR5;
+ case MC_SEQ_MISC0__MT__HBM:
+ return AMDGPU_VRAM_TYPE_HBM;
+ case MC_SEQ_MISC0__MT__DDR3:
+ return AMDGPU_VRAM_TYPE_DDR3;
+ default:
+ return AMDGPU_VRAM_TYPE_UNKNOWN;
+ }
+}
+
+static int gmc_v7_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v7_0_set_gmc_funcs(adev);
+ gmc_v7_0_set_irq_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start =
+ adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v7_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+ else
+ return 0;
+}
+
+static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+
+ return size;
+}
+
+static int gmc_v7_0_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ } else {
+ u32 tmp = RREG32(mmMC_SEQ_MISC0);
+
+ tmp &= MC_SEQ_MISC0__MT__MASK;
+ adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+ }
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ /* Adjust VM size here.
+ * Currently set to 4GB ((1 << 20) 4k pages).
+ * Max GPUVM size for cayman and SI is 40 bits.
+ */
+ amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
+
+ /* Set the internal MC address mask
+ * This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
+ if (r) {
+ pr_warn("No suitable DMA available\n");
+ return r;
+ }
+ adev->need_swiotlb = drm_need_swiotlb(40);
+
+ r = gmc_v7_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load mc firmware!\n");
+ return r;
+ }
+
+ r = gmc_v7_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v7_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = 8;
+ amdgpu_vm_manager_init(adev);
+
+ /* base offset of vram pages */
+ if (adev->flags & AMD_IS_APU) {
+ u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+ tmp <<= 22;
+ adev->vm_manager.vram_base_offset = tmp;
+ } else {
+ adev->vm_manager.vram_base_offset = 0;
+ }
+
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
+ return 0;
+}
+
+static int gmc_v7_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_gem_force_release(adev);
+ amdgpu_vm_manager_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
+ amdgpu_gart_table_vram_free(adev);
+ amdgpu_bo_fini(adev);
+ amdgpu_ucode_release(&adev->gmc.fw);
+
+ return 0;
+}
+
+static int gmc_v7_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v7_0_init_golden_registers(adev);
+
+ gmc_v7_0_mc_program(adev);
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = gmc_v7_0_mc_load_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load MC firmware!\n");
+ return r;
+ }
+ }
+
+ r = gmc_v7_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+ else
+ return r;
+}
+
+static int gmc_v7_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ gmc_v7_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v7_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v7_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v7_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v7_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v7_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK | SRBM_STATUS__VMC_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int gmc_v7_0_wait_for_idle(void *handle)
+{
+ unsigned int i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
+ SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK |
+ SRBM_STATUS__MCD_BUSY_MASK |
+ SRBM_STATUS__VMC_BUSY_MASK);
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+
+}
+
+static int gmc_v7_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) {
+ if (!(adev->flags & AMD_IS_APU))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
+ }
+
+ if (srbm_soft_reset) {
+ gmc_v7_0_mc_stop(adev);
+ if (gmc_v7_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
+ gmc_v7_0_mc_resume(adev);
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int gmc_v7_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp;
+ u32 bits = (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* system context */
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ /* VMs */
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* system context */
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ /* VMs */
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 addr, status, mc_client, vmid;
+
+ addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
+ status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+ mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
+ /* reset addr and status */
+ WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
+
+ if (!addr && !status)
+ return 0;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
+ gmc_v7_0_set_fault_enable_default(adev, false);
+
+ if (printk_ratelimit()) {
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ addr);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ status);
+ gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client,
+ entry->pasid);
+ }
+
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
+ return 0;
+}
+
+static int gmc_v7_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ bool gate = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ gmc_v7_0_enable_mc_mgcg(adev, gate);
+ gmc_v7_0_enable_mc_ls(adev, gate);
+ }
+ gmc_v7_0_enable_bif_mgls(adev, gate);
+ gmc_v7_0_enable_hdp_mgcg(adev, gate);
+ gmc_v7_0_enable_hdp_ls(adev, gate);
+
+ return 0;
+}
+
+static int gmc_v7_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
+ .name = "gmc_v7_0",
+ .early_init = gmc_v7_0_early_init,
+ .late_init = gmc_v7_0_late_init,
+ .sw_init = gmc_v7_0_sw_init,
+ .sw_fini = gmc_v7_0_sw_fini,
+ .hw_init = gmc_v7_0_hw_init,
+ .hw_fini = gmc_v7_0_hw_fini,
+ .suspend = gmc_v7_0_suspend,
+ .resume = gmc_v7_0_resume,
+ .is_idle = gmc_v7_0_is_idle,
+ .wait_for_idle = gmc_v7_0_wait_for_idle,
+ .soft_reset = gmc_v7_0_soft_reset,
+ .set_clockgating_state = gmc_v7_0_set_clockgating_state,
+ .set_powergating_state = gmc_v7_0_set_powergating_state,
+};
+
+static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
+ .set_prt = gmc_v7_0_set_prt,
+ .get_vm_pde = gmc_v7_0_get_vm_pde,
+ .get_vm_pte = gmc_v7_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v7_0_get_vbios_fb_size,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
+ .set = gmc_v7_0_vm_fault_interrupt_state,
+ .process = gmc_v7_0_process_interrupt,
+};
+
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
+}
+
+static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version gmc_v7_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v7_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 7,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &gmc_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
new file mode 100644
index 000000000..ebce2966c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V7_0_H__
+#define __GMC_V7_0_H__
+
+extern const struct amdgpu_ip_block_version gmc_v7_0_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v7_4_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
new file mode 100644
index 000000000..5af235202
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -0,0 +1,1766 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+#include "amdgpu.h"
+#include "gmc_v8_0.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
+#include "vid.h"
+#include "vi.h"
+
+#include "amdgpu_atombios.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
+static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v8_0_wait_for_idle(void *handle);
+
+MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_32_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
+
+static const u32 golden_settings_tonga_a11[] = {
+ mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
+ mmMC_HUB_RDREQ_DMIF_LIMIT, 0x0000007f, 0x00000028,
+ mmMC_HUB_WDP_UMC, 0x00007fb6, 0x00000991,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 tonga_mgcg_cgcg_init[] = {
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static const u32 golden_settings_fiji_a10[] = {
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] = {
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static const u32 golden_settings_polaris11_a11[] = {
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 golden_settings_polaris10_a11[] = {
+ mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff
+};
+
+static const u32 cz_mgcg_cgcg_init[] = {
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static const u32 stoney_mgcg_cgcg_init[] = {
+ mmATC_MISC_CG, 0xffffffff, 0x000c0200,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
+};
+
+static const u32 golden_settings_stoney_common[] = {
+ mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
+ mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
+};
+
+static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(fiji_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_fiji_a10,
+ ARRAY_SIZE(golden_settings_fiji_a10));
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(tonga_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_tonga_a11,
+ ARRAY_SIZE(golden_settings_tonga_a11));
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris11_a11,
+ ARRAY_SIZE(golden_settings_polaris11_a11));
+ break;
+ case CHIP_POLARIS10:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris10_a11,
+ ARRAY_SIZE(golden_settings_polaris10_a11));
+ break;
+ case CHIP_CARRIZO:
+ amdgpu_device_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+ ARRAY_SIZE(cz_mgcg_cgcg_init));
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_program_register_sequence(adev,
+ stoney_mgcg_cgcg_init,
+ ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_stoney_common,
+ ARRAY_SIZE(golden_settings_stoney_common));
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
+{
+ u32 blackout;
+
+ gmc_v8_0_wait_for_idle(adev);
+
+ blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
+ /* Block CPU access */
+ WREG32(mmBIF_FB_EN, 0);
+ /* blackout the MC */
+ blackout = REG_SET_FIELD(blackout,
+ MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 1);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, blackout);
+ }
+ /* wait for the MC to settle */
+ udelay(100);
+}
+
+static void gmc_v8_0_mc_resume(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* unblackout the MC */
+ tmp = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+ WREG32(mmMC_SHARED_BLACKOUT_CNTL, tmp);
+ /* allow CPU access */
+ tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
+ tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
+ WREG32(mmBIF_FB_EN, tmp);
+}
+
+/**
+ * gmc_v8_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ chip_name = "tonga";
+ break;
+ case CHIP_POLARIS11:
+ if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision) ||
+ ASICID_IS_P31(adev->pdev->device, adev->pdev->revision))
+ chip_name = "polaris11_k";
+ else
+ chip_name = "polaris11";
+ break;
+ case CHIP_POLARIS10:
+ if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision))
+ chip_name = "polaris10_k";
+ else
+ chip_name = "polaris10";
+ break;
+ case CHIP_POLARIS12:
+ if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
+ chip_name = "polaris12_k";
+ } else {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, ixMC_IO_DEBUG_UP_159);
+ /* Polaris12 32bit ASIC needs a special MC firmware */
+ if (RREG32(mmMC_SEQ_IO_DEBUG_DATA) == 0x05b4dc40)
+ chip_name = "polaris12_32";
+ else
+ chip_name = "polaris12";
+ }
+ break;
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ case CHIP_VEGAM:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ if (err) {
+ pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
+ amdgpu_ucode_release(&adev->gmc.fw);
+ }
+ return err;
+}
+
+/**
+ * gmc_v8_0_tonga_mc_load_microcode - load tonga MC ucode into the hw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Load the GDDR MC ucode into the hw (VI).
+ * Returns 0 on success, error on failure.
+ */
+static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct mc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data = NULL;
+ const __le32 *io_mc_regs = NULL;
+ u32 running;
+ int i, ucode_size, regs_size;
+
+ /* Skip MC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ * Skip MC ucode loading on VF, because hypervisor will do that
+ * for this adaptor.
+ */
+ if (amdgpu_sriov_bios(adev))
+ return 0;
+
+ if (!adev->gmc.fw)
+ return -EINVAL;
+
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
+ amdgpu_ucode_print_mc_hdr(&hdr->header);
+
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
+ io_mc_regs = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ fw_data = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
+
+ if (running == 0) {
+ /* reset the engine and set to writable */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);
+
+ /* load mc io regs */
+ for (i = 0; i < regs_size; i++) {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(io_mc_regs++));
+ WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(io_mc_regs++));
+ }
+ /* load the MC ucode */
+ for (i = 0; i < ucode_size; i++)
+ WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(fw_data++));
+
+ /* put the engine back into the active state */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);
+
+ /* wait for training to complete */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (REG_GET_FIELD(RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL),
+ MC_SEQ_TRAIN_WAKEUP_CNTL, TRAIN_DONE_D0))
+ break;
+ udelay(1);
+ }
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (REG_GET_FIELD(RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL),
+ MC_SEQ_TRAIN_WAKEUP_CNTL, TRAIN_DONE_D1))
+ break;
+ udelay(1);
+ }
+ }
+
+ return 0;
+}
+
+static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct mc_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data = NULL;
+ const __le32 *io_mc_regs = NULL;
+ u32 data;
+ int i, ucode_size, regs_size;
+
+ /* Skip MC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ * Skip MC ucode loading on VF, because hypervisor will do that
+ * for this adaptor.
+ */
+ if (amdgpu_sriov_bios(adev))
+ return 0;
+
+ if (!adev->gmc.fw)
+ return -EINVAL;
+
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
+ amdgpu_ucode_print_mc_hdr(&hdr->header);
+
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
+ io_mc_regs = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+ fw_data = (const __le32 *)
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ data = RREG32(mmMC_SEQ_MISC0);
+ data &= ~(0x40);
+ WREG32(mmMC_SEQ_MISC0, data);
+
+ /* load mc io regs */
+ for (i = 0; i < regs_size; i++) {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(io_mc_regs++));
+ WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(io_mc_regs++));
+ }
+
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);
+
+ /* load the MC ucode */
+ for (i = 0; i < ucode_size; i++)
+ WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(fw_data++));
+
+ /* put the engine back into the active state */
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);
+
+ /* wait for training to complete */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ data = RREG32(mmMC_SEQ_MISC0);
+ if (data & 0x80)
+ break;
+ udelay(1);
+ }
+
+ return 0;
+}
+
+static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+ base <<= 24;
+
+ amdgpu_gmc_vram_location(adev, mc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+}
+
+/**
+ * gmc_v8_0_mc_program - program the GPU memory controller
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set the location of vram, gart, and AGP in the GPU's
+ * physical address space (VI).
+ */
+static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i, j;
+
+ /* Initialize HDP */
+ for (i = 0, j = 0; i < 32; i++, j += 0x6) {
+ WREG32((0xb05 + j), 0x00000000);
+ WREG32((0xb06 + j), 0x00000000);
+ WREG32((0xb07 + j), 0x00000000);
+ WREG32((0xb08 + j), 0x00000000);
+ WREG32((0xb09 + j), 0x00000000);
+ }
+ WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+ if (gmc_v8_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+
+ if (adev->mode_info.num_crtc) {
+ /* Lockout access through VGA aperture*/
+ tmp = RREG32(mmVGA_HDP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ WREG32(mmVGA_HDP_CONTROL, tmp);
+
+ /* disable VGA render */
+ tmp = RREG32(mmVGA_RENDER_CONTROL);
+ tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ WREG32(mmVGA_RENDER_CONTROL, tmp);
+ }
+ /* Update configuration */
+ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 12);
+ WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+ adev->mem_scratch.gpu_addr >> 12);
+
+ if (amdgpu_sriov_vf(adev)) {
+ tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
+ tmp |= ((adev->gmc.vram_start >> 24) & 0xFFFF);
+ WREG32(mmMC_VM_FB_LOCATION, tmp);
+ /* XXX double check these! */
+ WREG32(mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+ WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+ WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+ }
+
+ WREG32(mmMC_VM_AGP_BASE, 0);
+ WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+ if (gmc_v8_0_wait_for_idle((void *)adev))
+ dev_warn(adev->dev, "Wait for MC idle timedout !\n");
+
+ WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+
+ tmp = RREG32(mmHDP_MISC_CNTL);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
+ WREG32(mmHDP_MISC_CNTL, tmp);
+
+ tmp = RREG32(mmHDP_HOST_PATH_CNTL);
+ WREG32(mmHDP_HOST_PATH_CNTL, tmp);
+}
+
+/**
+ * gmc_v8_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space (VI).
+ * Returns 0 for success.
+ */
+static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 tmp;
+
+ adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* Get VRAM informations */
+ tmp = RREG32(mmMC_ARB_RAMCFG);
+ if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE))
+ chansize = 64;
+ else
+ chansize = 32;
+
+ tmp = RREG32(mmMC_SHARED_CHMAP);
+ switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+ case 0:
+ default:
+ numchan = 1;
+ break;
+ case 1:
+ numchan = 2;
+ break;
+ case 2:
+ numchan = 4;
+ break;
+ case 3:
+ numchan = 8;
+ break;
+ case 4:
+ numchan = 3;
+ break;
+ case 5:
+ numchan = 6;
+ break;
+ case 6:
+ numchan = 10;
+ break;
+ case 7:
+ numchan = 12;
+ break;
+ case 8:
+ numchan = 16;
+ break;
+ }
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ /* size in MB on si */
+ tmp = RREG32(mmCONFIG_MEMSIZE);
+ /* some boards may have garbage in the upper 16 bits */
+ if (tmp & 0xffff0000) {
+ DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ if (tmp & 0xffff)
+ tmp &= 0xffff;
+ }
+ adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS10: /* all engines support GPUVM */
+ case CHIP_POLARIS11: /* all engines support GPUVM */
+ case CHIP_POLARIS12: /* all engines support GPUVM */
+ case CHIP_VEGAM: /* all engines support GPUVM */
+ default:
+ adev->gmc.gart_size = 256ULL << 20;
+ break;
+ case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
+ case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
+ case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
+ case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
+ gmc_v8_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: type of flush
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+/**
+ * gmc_v8_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: type of flush
+ *
+ * Flush the TLB for the requested page table (VI).
+ */
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ /* bits 0-15 are the VM contexts0-15 */
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
+static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
+/*
+ * PTE format on VI:
+ * 63:40 reserved
+ * 39:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 reserved
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on VI:
+ * 63:59 block fragment size
+ * 58:40 reserved
+ * 39:1 physical base address of PTE
+ * bits 5:1 must be 0.
+ * 0 valid
+ */
+
+static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
+}
+
+static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
+/**
+ * gmc_v8_0_set_fault_enable_default - update VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+}
+
+/**
+ * gmc_v8_0_set_prt() - set PRT VM fault
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable VM fault handling for PRT
+ */
+static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && !adev->gmc.prt_warning) {
+ dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
+ adev->gmc.prt_warning = true;
+ }
+
+ tmp = RREG32(mmVM_PRT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L2_CACHE_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ L1_TLB_STORE_INVALID_ENTRIES, enable);
+ tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL,
+ MASK_PDE0_FAULT, enable);
+ WREG32(mmVM_PRT_CNTL, tmp);
+
+ if (enable) {
+ uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high);
+ } else {
+ WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff);
+ WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0);
+ WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0);
+ }
+}
+
+/**
+ * gmc_v8_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This sets up the TLBs, programs the page tables for VMID0,
+ * sets up the hw for VMIDs 1-15 which are allocated on
+ * demand, and sets up the global locations for the LDS, GDS,
+ * and GPUVM for FSA64 clients (VI).
+ * Returns 0 for success, errors for failure.
+ */
+static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
+{
+ uint64_t table_addr;
+ u32 tmp, field;
+ int i;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
+ /* Setup TLB control */
+ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ /* Setup L2 cache */
+ tmp = RREG32(mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ WREG32(mmVM_L2_CNTL, tmp);
+ tmp = RREG32(mmVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32(mmVM_L2_CNTL2, tmp);
+
+ field = adev->vm_manager.fragment_size;
+ tmp = RREG32(mmVM_L2_CNTL3);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
+ WREG32(mmVM_L2_CNTL3, tmp);
+ /* XXX: set to enable PTE/PDE in system memory */
+ tmp = RREG32(mmVM_L2_CNTL4);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SHARED, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SHARED, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PDE_REQUEST_SHARED, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PDE_REQUEST_SNOOP, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SHARED, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
+ WREG32(mmVM_L2_CNTL4, tmp);
+ /* setup context0 */
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT0_CNTL2, 0);
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+
+ WREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR, 0);
+ WREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR, 0);
+ WREG32(mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET, 0);
+
+ /* empty context1-15 */
+ /* FIXME start with 4G, once using 2 level pt switch to full
+ * vm size space
+ */
+ /* set vm size, must be a multiple of 4 */
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+ WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1);
+ for (i = 1; i < AMDGPU_NUM_VMID; i++) {
+ if (i < 8)
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
+ table_addr >> 12);
+ else
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
+ table_addr >> 12);
+ }
+
+ /* enable context1-15 */
+ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32(mmVM_CONTEXT1_CNTL2, 4);
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ gmc_v8_0_set_fault_enable_default(adev, false);
+ else
+ gmc_v8_0_set_fault_enable_default(adev, true);
+
+ gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned int)(adev->gmc.gart_size >> 20),
+ (unsigned long long)table_addr);
+ return 0;
+}
+
+static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "R600 PCIE GART already initialized\n");
+ return 0;
+ }
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_EXECUTABLE;
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+/**
+ * gmc_v8_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table (VI).
+ */
+static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /* Disable all tables */
+ WREG32(mmVM_CONTEXT0_CNTL, 0);
+ WREG32(mmVM_CONTEXT1_CNTL, 0);
+ /* Setup TLB control */
+ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32(mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ /* Setup L2 cache */
+ tmp = RREG32(mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32(mmVM_L2_CNTL, tmp);
+ WREG32(mmVM_L2_CNTL2, 0);
+}
+
+/**
+ * gmc_v8_0_vm_decode_fault - print human readable fault info
+ *
+ * @adev: amdgpu_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
+ * @pasid: debug logging only - no functional use
+ *
+ * Print human readable fault information (VI).
+ */
+static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+ u32 addr, u32 mc_client, unsigned int pasid)
+{
+ u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+ u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+ char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
+ (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+ u32 mc_id;
+
+ mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ protections, vmid, pasid, addr,
+ REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_RW) ?
+ "write" : "read", block, mc_client, mc_id);
+}
+
+static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
+{
+ switch (mc_seq_vram_type) {
+ case MC_SEQ_MISC0__MT__GDDR1:
+ return AMDGPU_VRAM_TYPE_GDDR1;
+ case MC_SEQ_MISC0__MT__DDR2:
+ return AMDGPU_VRAM_TYPE_DDR2;
+ case MC_SEQ_MISC0__MT__GDDR3:
+ return AMDGPU_VRAM_TYPE_GDDR3;
+ case MC_SEQ_MISC0__MT__GDDR4:
+ return AMDGPU_VRAM_TYPE_GDDR4;
+ case MC_SEQ_MISC0__MT__GDDR5:
+ return AMDGPU_VRAM_TYPE_GDDR5;
+ case MC_SEQ_MISC0__MT__HBM:
+ return AMDGPU_VRAM_TYPE_HBM;
+ case MC_SEQ_MISC0__MT__DDR3:
+ return AMDGPU_VRAM_TYPE_DDR3;
+ default:
+ return AMDGPU_VRAM_TYPE_UNKNOWN;
+ }
+}
+
+static int gmc_v8_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v8_0_set_gmc_funcs(adev);
+ gmc_v8_0_set_irq_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start =
+ adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v8_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+ else
+ return 0;
+}
+
+static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned int size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+
+ return size;
+}
+
+#define mmMC_SEQ_MISC0_FIJI 0xA71
+
+static int gmc_v8_0_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ } else {
+ u32 tmp;
+
+ if ((adev->asic_type == CHIP_FIJI) ||
+ (adev->asic_type == CHIP_VEGAM))
+ tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
+ else
+ tmp = RREG32(mmMC_SEQ_MISC0);
+ tmp &= MC_SEQ_MISC0__MT__MASK;
+ adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+ }
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ /* Adjust VM size here.
+ * Currently set to 4GB ((1 << 20) 4k pages).
+ * Max GPUVM size for cayman and SI is 40 bits.
+ */
+ amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
+
+ /* Set the internal MC address mask
+ * This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));
+ if (r) {
+ pr_warn("No suitable DMA available\n");
+ return r;
+ }
+ adev->need_swiotlb = drm_need_swiotlb(40);
+
+ r = gmc_v8_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load mc firmware!\n");
+ return r;
+ }
+
+ r = gmc_v8_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v8_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = 8;
+ amdgpu_vm_manager_init(adev);
+
+ /* base offset of vram pages */
+ if (adev->flags & AMD_IS_APU) {
+ u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
+
+ tmp <<= 22;
+ adev->vm_manager.vram_base_offset = tmp;
+ } else {
+ adev->vm_manager.vram_base_offset = 0;
+ }
+
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
+ return 0;
+}
+
+static int gmc_v8_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_gem_force_release(adev);
+ amdgpu_vm_manager_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
+ amdgpu_gart_table_vram_free(adev);
+ amdgpu_bo_fini(adev);
+ amdgpu_ucode_release(&adev->gmc.fw);
+
+ return 0;
+}
+
+static int gmc_v8_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v8_0_init_golden_registers(adev);
+
+ gmc_v8_0_mc_program(adev);
+
+ if (adev->asic_type == CHIP_TONGA) {
+ r = gmc_v8_0_tonga_mc_load_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load MC firmware!\n");
+ return r;
+ }
+ } else if (adev->asic_type == CHIP_POLARIS11 ||
+ adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS12) {
+ r = gmc_v8_0_polaris_mc_load_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load MC firmware!\n");
+ return r;
+ }
+ }
+
+ r = gmc_v8_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+ else
+ return r;
+}
+
+static int gmc_v8_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ gmc_v8_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v8_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v8_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v8_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v8_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v8_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK | SRBM_STATUS__VMC_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int gmc_v8_0_wait_for_idle(void *handle)
+{
+ unsigned int i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
+ SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK |
+ SRBM_STATUS__MCD_BUSY_MASK |
+ SRBM_STATUS__VMC_BUSY_MASK |
+ SRBM_STATUS__VMC1_BUSY_MASK);
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+
+}
+
+static bool gmc_v8_0_check_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
+
+ if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
+ SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) {
+ if (!(adev->flags & AMD_IS_APU))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
+ }
+
+ if (srbm_soft_reset) {
+ adev->gmc.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ }
+
+ adev->gmc.srbm_soft_reset = 0;
+
+ return false;
+}
+
+static int gmc_v8_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->gmc.srbm_soft_reset)
+ return 0;
+
+ gmc_v8_0_mc_stop(adev);
+ if (gmc_v8_0_wait_for_idle(adev))
+ dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
+
+ return 0;
+}
+
+static int gmc_v8_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->gmc.srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->gmc.srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int gmc_v8_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->gmc.srbm_soft_reset)
+ return 0;
+
+ gmc_v8_0_mc_resume(adev);
+ return 0;
+}
+
+static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp;
+ u32 bits = (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* system context */
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ /* VMs */
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp &= ~bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* system context */
+ tmp = RREG32(mmVM_CONTEXT0_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT0_CNTL, tmp);
+ /* VMs */
+ tmp = RREG32(mmVM_CONTEXT1_CNTL);
+ tmp |= bits;
+ WREG32(mmVM_CONTEXT1_CNTL, tmp);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 addr, status, mc_client, vmid;
+
+ if (amdgpu_sriov_vf(adev)) {
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+ dev_err(adev->dev, " Can't decode VM fault info here on SRIOV VF\n");
+ return 0;
+ }
+
+ addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
+ status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+ mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
+ /* reset addr and status */
+ WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);
+
+ if (!addr && !status)
+ return 0;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
+ gmc_v8_0_set_fault_enable_default(adev, false);
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
+ entry->src_id, entry->src_data[0], task_info.process_name,
+ task_info.tgid, task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ addr);
+ dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ status);
+ gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
+ entry->pasid);
+ }
+
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
+ return 0;
+}
+
+static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data |= MC_HUB_MISC_HUB_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_SIP_CG);
+ data |= MC_HUB_MISC_SIP_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_SIP_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_VM_CG);
+ data |= MC_HUB_MISC_VM_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_VM_CG, data);
+
+ data = RREG32(mmMC_XPB_CLK_GAT);
+ data |= MC_XPB_CLK_GAT__ENABLE_MASK;
+ WREG32(mmMC_XPB_CLK_GAT, data);
+
+ data = RREG32(mmATC_MISC_CG);
+ data |= ATC_MISC_CG__ENABLE_MASK;
+ WREG32(mmATC_MISC_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_WR_CG);
+ data |= MC_CITF_MISC_WR_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_WR_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_RD_CG);
+ data |= MC_CITF_MISC_RD_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_RD_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_VM_CG);
+ data |= MC_CITF_MISC_VM_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_VM_CG, data);
+
+ data = RREG32(mmVM_L2_CG);
+ data |= VM_L2_CG__ENABLE_MASK;
+ WREG32(mmVM_L2_CG, data);
+ } else {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data &= ~MC_HUB_MISC_HUB_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_SIP_CG);
+ data &= ~MC_HUB_MISC_SIP_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_SIP_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_VM_CG);
+ data &= ~MC_HUB_MISC_VM_CG__ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_VM_CG, data);
+
+ data = RREG32(mmMC_XPB_CLK_GAT);
+ data &= ~MC_XPB_CLK_GAT__ENABLE_MASK;
+ WREG32(mmMC_XPB_CLK_GAT, data);
+
+ data = RREG32(mmATC_MISC_CG);
+ data &= ~ATC_MISC_CG__ENABLE_MASK;
+ WREG32(mmATC_MISC_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_WR_CG);
+ data &= ~MC_CITF_MISC_WR_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_WR_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_RD_CG);
+ data &= ~MC_CITF_MISC_RD_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_RD_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_VM_CG);
+ data &= ~MC_CITF_MISC_VM_CG__ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_VM_CG, data);
+
+ data = RREG32(mmVM_L2_CG);
+ data &= ~VM_L2_CG__ENABLE_MASK;
+ WREG32(mmVM_L2_CG, data);
+ }
+}
+
+static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data |= MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_SIP_CG);
+ data |= MC_HUB_MISC_SIP_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_SIP_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_VM_CG);
+ data |= MC_HUB_MISC_VM_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_VM_CG, data);
+
+ data = RREG32(mmMC_XPB_CLK_GAT);
+ data |= MC_XPB_CLK_GAT__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_XPB_CLK_GAT, data);
+
+ data = RREG32(mmATC_MISC_CG);
+ data |= ATC_MISC_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmATC_MISC_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_WR_CG);
+ data |= MC_CITF_MISC_WR_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_WR_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_RD_CG);
+ data |= MC_CITF_MISC_RD_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_RD_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_VM_CG);
+ data |= MC_CITF_MISC_VM_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_VM_CG, data);
+
+ data = RREG32(mmVM_L2_CG);
+ data |= VM_L2_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmVM_L2_CG, data);
+ } else {
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ data &= ~MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_HUB_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_SIP_CG);
+ data &= ~MC_HUB_MISC_SIP_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_SIP_CG, data);
+
+ data = RREG32(mmMC_HUB_MISC_VM_CG);
+ data &= ~MC_HUB_MISC_VM_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_HUB_MISC_VM_CG, data);
+
+ data = RREG32(mmMC_XPB_CLK_GAT);
+ data &= ~MC_XPB_CLK_GAT__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_XPB_CLK_GAT, data);
+
+ data = RREG32(mmATC_MISC_CG);
+ data &= ~ATC_MISC_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmATC_MISC_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_WR_CG);
+ data &= ~MC_CITF_MISC_WR_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_WR_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_RD_CG);
+ data &= ~MC_CITF_MISC_RD_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_RD_CG, data);
+
+ data = RREG32(mmMC_CITF_MISC_VM_CG);
+ data &= ~MC_CITF_MISC_VM_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmMC_CITF_MISC_VM_CG, data);
+
+ data = RREG32(mmVM_L2_CG);
+ data &= ~VM_L2_CG__MEM_LS_ENABLE_MASK;
+ WREG32(mmVM_L2_CG, data);
+ }
+}
+
+static int gmc_v8_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ fiji_update_mc_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ fiji_update_mc_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gmc_v8_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ data = RREG32(mmMC_HUB_MISC_HUB_CG);
+ if (data & MC_HUB_MISC_HUB_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
+ .name = "gmc_v8_0",
+ .early_init = gmc_v8_0_early_init,
+ .late_init = gmc_v8_0_late_init,
+ .sw_init = gmc_v8_0_sw_init,
+ .sw_fini = gmc_v8_0_sw_fini,
+ .hw_init = gmc_v8_0_hw_init,
+ .hw_fini = gmc_v8_0_hw_fini,
+ .suspend = gmc_v8_0_suspend,
+ .resume = gmc_v8_0_resume,
+ .is_idle = gmc_v8_0_is_idle,
+ .wait_for_idle = gmc_v8_0_wait_for_idle,
+ .check_soft_reset = gmc_v8_0_check_soft_reset,
+ .pre_soft_reset = gmc_v8_0_pre_soft_reset,
+ .soft_reset = gmc_v8_0_soft_reset,
+ .post_soft_reset = gmc_v8_0_post_soft_reset,
+ .set_clockgating_state = gmc_v8_0_set_clockgating_state,
+ .set_powergating_state = gmc_v8_0_set_powergating_state,
+ .get_clockgating_state = gmc_v8_0_get_clockgating_state,
+};
+
+static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
+ .set_prt = gmc_v8_0_set_prt,
+ .get_vm_pde = gmc_v8_0_get_vm_pde,
+ .get_vm_pte = gmc_v8_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v8_0_get_vbios_fb_size,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
+ .set = gmc_v8_0_vm_fault_interrupt_state,
+ .process = gmc_v8_0_process_interrupt,
+};
+
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
+}
+
+static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version gmc_v8_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 8,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v8_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 8,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &gmc_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v8_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 8,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &gmc_v8_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
new file mode 100644
index 000000000..19b8a8aed
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V8_0_H__
+#define __GMC_V8_0_H__
+
+extern const struct amdgpu_ip_block_version gmc_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v8_1_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v8_5_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
new file mode 100644
index 000000000..89550d3df
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -0,0 +1,2509 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "gmc_v9_0.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_gem.h"
+
+#include "gc/gc_9_0_sh_mask.h"
+#include "dce/dce_12_0_offset.h"
+#include "dce/dce_12_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "oss/osssys_4_0_offset.h"
+
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "umc/umc_6_0_sh_mask.h"
+
+#include "gfxhub_v1_0.h"
+#include "mmhub_v1_0.h"
+#include "athub_v1_0.h"
+#include "gfxhub_v1_1.h"
+#include "gfxhub_v1_2.h"
+#include "mmhub_v9_4.h"
+#include "mmhub_v1_7.h"
+#include "mmhub_v1_8.h"
+#include "umc_v6_1.h"
+#include "umc_v6_0.h"
+#include "umc_v6_7.h"
+#include "hdp_v4_0.h"
+#include "mca_v3_0.h"
+
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+
+#include "amdgpu_reset.h"
+
+/* add these here since we already include dce12 headers and these are for DCN */
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
+
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
+
+#define MAX_MEM_RANGES 8
+
+static const char * const gfxhub_client_ids[] = {
+ "CB",
+ "DB",
+ "IA",
+ "WD",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "PA",
+};
+
+static const char *mmhub_client_ids_raven[][2] = {
+ [0][0] = "MP1",
+ [1][0] = "MP0",
+ [2][0] = "VCN",
+ [3][0] = "VCNU",
+ [4][0] = "HDP",
+ [5][0] = "DCE",
+ [13][0] = "UTCL2",
+ [19][0] = "TLS",
+ [26][0] = "OSS",
+ [27][0] = "SDMA0",
+ [0][1] = "MP1",
+ [1][1] = "MP0",
+ [2][1] = "VCN",
+ [3][1] = "VCNU",
+ [4][1] = "HDP",
+ [5][1] = "XDP",
+ [6][1] = "DBGU0",
+ [7][1] = "DCE",
+ [8][1] = "DCEDWB0",
+ [9][1] = "DCEDWB1",
+ [26][1] = "OSS",
+ [27][1] = "SDMA0",
+};
+
+static const char *mmhub_client_ids_renoir[][2] = {
+ [0][0] = "MP1",
+ [1][0] = "MP0",
+ [2][0] = "HDP",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [13][0] = "UTCL2",
+ [19][0] = "TLS",
+ [26][0] = "OSS",
+ [27][0] = "SDMA0",
+ [28][0] = "VCN",
+ [29][0] = "VCNU",
+ [30][0] = "JPEG",
+ [0][1] = "MP1",
+ [1][1] = "MP0",
+ [2][1] = "HDP",
+ [3][1] = "XDP",
+ [6][1] = "DBGU0",
+ [7][1] = "DCEDMC",
+ [8][1] = "DCEVGA",
+ [9][1] = "DCEDWB",
+ [26][1] = "OSS",
+ [27][1] = "SDMA0",
+ [28][1] = "VCN",
+ [29][1] = "VCNU",
+ [30][1] = "JPEG",
+};
+
+static const char *mmhub_client_ids_vega10[][2] = {
+ [0][0] = "MP0",
+ [1][0] = "UVD",
+ [2][0] = "UVDU",
+ [3][0] = "HDP",
+ [13][0] = "UTCL2",
+ [14][0] = "OSS",
+ [15][0] = "SDMA1",
+ [32+0][0] = "VCE0",
+ [32+1][0] = "VCE0U",
+ [32+2][0] = "XDMA",
+ [32+3][0] = "DCE",
+ [32+4][0] = "MP1",
+ [32+14][0] = "SDMA0",
+ [0][1] = "MP0",
+ [1][1] = "UVD",
+ [2][1] = "UVDU",
+ [3][1] = "DBGU0",
+ [4][1] = "HDP",
+ [5][1] = "XDP",
+ [14][1] = "OSS",
+ [15][1] = "SDMA0",
+ [32+0][1] = "VCE0",
+ [32+1][1] = "VCE0U",
+ [32+2][1] = "XDMA",
+ [32+3][1] = "DCE",
+ [32+4][1] = "DCEDWB",
+ [32+5][1] = "MP1",
+ [32+6][1] = "DBGU1",
+ [32+14][1] = "SDMA1",
+};
+
+static const char *mmhub_client_ids_vega12[][2] = {
+ [0][0] = "MP0",
+ [1][0] = "VCE0",
+ [2][0] = "VCE0U",
+ [3][0] = "HDP",
+ [13][0] = "UTCL2",
+ [14][0] = "OSS",
+ [15][0] = "SDMA1",
+ [32+0][0] = "DCE",
+ [32+1][0] = "XDMA",
+ [32+2][0] = "UVD",
+ [32+3][0] = "UVDU",
+ [32+4][0] = "MP1",
+ [32+15][0] = "SDMA0",
+ [0][1] = "MP0",
+ [1][1] = "VCE0",
+ [2][1] = "VCE0U",
+ [3][1] = "DBGU0",
+ [4][1] = "HDP",
+ [5][1] = "XDP",
+ [14][1] = "OSS",
+ [15][1] = "SDMA0",
+ [32+0][1] = "DCE",
+ [32+1][1] = "DCEDWB",
+ [32+2][1] = "XDMA",
+ [32+3][1] = "UVD",
+ [32+4][1] = "UVDU",
+ [32+5][1] = "MP1",
+ [32+6][1] = "DBGU1",
+ [32+15][1] = "SDMA1",
+};
+
+static const char *mmhub_client_ids_vega20[][2] = {
+ [0][0] = "XDMA",
+ [1][0] = "DCE",
+ [2][0] = "VCE0",
+ [3][0] = "VCE0U",
+ [4][0] = "UVD",
+ [5][0] = "UVD1U",
+ [13][0] = "OSS",
+ [14][0] = "HDP",
+ [15][0] = "SDMA0",
+ [32+0][0] = "UVD",
+ [32+1][0] = "UVDU",
+ [32+2][0] = "MP1",
+ [32+3][0] = "MP0",
+ [32+12][0] = "UTCL2",
+ [32+14][0] = "SDMA1",
+ [0][1] = "XDMA",
+ [1][1] = "DCE",
+ [2][1] = "DCEDWB",
+ [3][1] = "VCE0",
+ [4][1] = "VCE0U",
+ [5][1] = "UVD1",
+ [6][1] = "UVD1U",
+ [7][1] = "DBGU0",
+ [8][1] = "XDP",
+ [13][1] = "OSS",
+ [14][1] = "HDP",
+ [15][1] = "SDMA0",
+ [32+0][1] = "UVD",
+ [32+1][1] = "UVDU",
+ [32+2][1] = "DBGU1",
+ [32+3][1] = "MP1",
+ [32+4][1] = "MP0",
+ [32+14][1] = "SDMA1",
+};
+
+static const char *mmhub_client_ids_arcturus[][2] = {
+ [0][0] = "DBGU1",
+ [1][0] = "XDP",
+ [2][0] = "MP1",
+ [14][0] = "HDP",
+ [171][0] = "JPEG",
+ [172][0] = "VCN",
+ [173][0] = "VCNU",
+ [203][0] = "JPEG1",
+ [204][0] = "VCN1",
+ [205][0] = "VCN1U",
+ [256][0] = "SDMA0",
+ [257][0] = "SDMA1",
+ [258][0] = "SDMA2",
+ [259][0] = "SDMA3",
+ [260][0] = "SDMA4",
+ [261][0] = "SDMA5",
+ [262][0] = "SDMA6",
+ [263][0] = "SDMA7",
+ [384][0] = "OSS",
+ [0][1] = "DBGU1",
+ [1][1] = "XDP",
+ [2][1] = "MP1",
+ [14][1] = "HDP",
+ [171][1] = "JPEG",
+ [172][1] = "VCN",
+ [173][1] = "VCNU",
+ [203][1] = "JPEG1",
+ [204][1] = "VCN1",
+ [205][1] = "VCN1U",
+ [256][1] = "SDMA0",
+ [257][1] = "SDMA1",
+ [258][1] = "SDMA2",
+ [259][1] = "SDMA3",
+ [260][1] = "SDMA4",
+ [261][1] = "SDMA5",
+ [262][1] = "SDMA6",
+ [263][1] = "SDMA7",
+ [384][1] = "OSS",
+};
+
+static const char *mmhub_client_ids_aldebaran[][2] = {
+ [2][0] = "MP1",
+ [3][0] = "MP0",
+ [32+1][0] = "DBGU_IO0",
+ [32+2][0] = "DBGU_IO2",
+ [32+4][0] = "MPIO",
+ [96+11][0] = "JPEG0",
+ [96+12][0] = "VCN0",
+ [96+13][0] = "VCNU0",
+ [128+11][0] = "JPEG1",
+ [128+12][0] = "VCN1",
+ [128+13][0] = "VCNU1",
+ [160+1][0] = "XDP",
+ [160+14][0] = "HDP",
+ [256+0][0] = "SDMA0",
+ [256+1][0] = "SDMA1",
+ [256+2][0] = "SDMA2",
+ [256+3][0] = "SDMA3",
+ [256+4][0] = "SDMA4",
+ [384+0][0] = "OSS",
+ [2][1] = "MP1",
+ [3][1] = "MP0",
+ [32+1][1] = "DBGU_IO0",
+ [32+2][1] = "DBGU_IO2",
+ [32+4][1] = "MPIO",
+ [96+11][1] = "JPEG0",
+ [96+12][1] = "VCN0",
+ [96+13][1] = "VCNU0",
+ [128+11][1] = "JPEG1",
+ [128+12][1] = "VCN1",
+ [128+13][1] = "VCNU1",
+ [160+1][1] = "XDP",
+ [160+14][1] = "HDP",
+ [256+0][1] = "SDMA0",
+ [256+1][1] = "SDMA1",
+ [256+2][1] = "SDMA2",
+ [256+3][1] = "SDMA3",
+ [256+4][1] = "SDMA4",
+ [384+0][1] = "OSS",
+};
+
+static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = {
+ SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
+ SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
+};
+
+static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = {
+ SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
+};
+
+static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
+ (0x000143c0 + 0x00000000),
+ (0x000143c0 + 0x00000800),
+ (0x000143c0 + 0x00001000),
+ (0x000143c0 + 0x00001800),
+ (0x000543c0 + 0x00000000),
+ (0x000543c0 + 0x00000800),
+ (0x000543c0 + 0x00001000),
+ (0x000543c0 + 0x00001800),
+ (0x000943c0 + 0x00000000),
+ (0x000943c0 + 0x00000800),
+ (0x000943c0 + 0x00001000),
+ (0x000943c0 + 0x00001800),
+ (0x000d43c0 + 0x00000000),
+ (0x000d43c0 + 0x00000800),
+ (0x000d43c0 + 0x00001000),
+ (0x000d43c0 + 0x00001800),
+ (0x001143c0 + 0x00000000),
+ (0x001143c0 + 0x00000800),
+ (0x001143c0 + 0x00001000),
+ (0x001143c0 + 0x00001800),
+ (0x001543c0 + 0x00000000),
+ (0x001543c0 + 0x00000800),
+ (0x001543c0 + 0x00001000),
+ (0x001543c0 + 0x00001800),
+ (0x001943c0 + 0x00000000),
+ (0x001943c0 + 0x00000800),
+ (0x001943c0 + 0x00001000),
+ (0x001943c0 + 0x00001800),
+ (0x001d43c0 + 0x00000000),
+ (0x001d43c0 + 0x00000800),
+ (0x001d43c0 + 0x00001000),
+ (0x001d43c0 + 0x00001800),
+};
+
+static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
+ (0x000143e0 + 0x00000000),
+ (0x000143e0 + 0x00000800),
+ (0x000143e0 + 0x00001000),
+ (0x000143e0 + 0x00001800),
+ (0x000543e0 + 0x00000000),
+ (0x000543e0 + 0x00000800),
+ (0x000543e0 + 0x00001000),
+ (0x000543e0 + 0x00001800),
+ (0x000943e0 + 0x00000000),
+ (0x000943e0 + 0x00000800),
+ (0x000943e0 + 0x00001000),
+ (0x000943e0 + 0x00001800),
+ (0x000d43e0 + 0x00000000),
+ (0x000d43e0 + 0x00000800),
+ (0x000d43e0 + 0x00001000),
+ (0x000d43e0 + 0x00001800),
+ (0x001143e0 + 0x00000000),
+ (0x001143e0 + 0x00000800),
+ (0x001143e0 + 0x00001000),
+ (0x001143e0 + 0x00001800),
+ (0x001543e0 + 0x00000000),
+ (0x001543e0 + 0x00000800),
+ (0x001543e0 + 0x00001000),
+ (0x001543e0 + 0x00001800),
+ (0x001943e0 + 0x00000000),
+ (0x001943e0 + 0x00000800),
+ (0x001943e0 + 0x00001000),
+ (0x001943e0 + 0x00001800),
+ (0x001d43e0 + 0x00000000),
+ (0x001d43e0 + 0x00000800),
+ (0x001d43e0 + 0x00001000),
+ (0x001d43e0 + 0x00001800),
+};
+
+static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 bits, i, tmp, reg;
+
+ /* Devices newer then VEGA10/12 shall have these programming
+ * sequences performed by PSP BL
+ */
+ if (adev->asic_type >= CHIP_VEGA20)
+ return 0;
+
+ bits = 0x7f;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
+ reg = ecc_umc_mcumc_ctrl_addrs[i];
+ tmp = RREG32(reg);
+ tmp &= ~bits;
+ WREG32(reg, tmp);
+ }
+ for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
+ reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
+ tmp = RREG32(reg);
+ tmp &= ~bits;
+ WREG32(reg, tmp);
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
+ reg = ecc_umc_mcumc_ctrl_addrs[i];
+ tmp = RREG32(reg);
+ tmp |= bits;
+ WREG32(reg, tmp);
+ }
+ for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
+ reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
+ tmp = RREG32(reg);
+ tmp |= bits;
+ WREG32(reg, tmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, bits, i, j;
+
+ bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_SOC15_IP(GC, reg);
+
+ tmp &= ~bits;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_SOC15_IP(GC, reg, tmp);
+ }
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_SOC15_IP(GC, reg);
+
+ tmp |= bits;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_SOC15_IP(GC, reg, tmp);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ bool retry_fault = !!(entry->src_data[1] & 0x80);
+ bool write_fault = !!(entry->src_data[1] & 0x20);
+ uint32_t status = 0, cid = 0, rw = 0;
+ struct amdgpu_task_info task_info;
+ struct amdgpu_vmhub *hub;
+ const char *mmhub_cid;
+ const char *hub_name;
+ u64 addr;
+ uint32_t cam_index = 0;
+ int ret, xcc_id = 0;
+ uint32_t node_id;
+
+ node_id = entry->node_id;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)];
+ } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
+ hub_name = "mmhub1";
+ hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ hub = &adev->vmhub[xcc_id];
+ }
+
+ if (retry_fault) {
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ cam_index = entry->src_data[2] & 0x3ff;
+
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, write_fault))
+ return 1;
+ }
+ }
+
+ if (!printk_ratelimit())
+ return 0;
+
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev,
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
+ hub_name, retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
+ node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
+ node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
+ (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ dev_err(adev->dev,
+ "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ if (entry->vmid_src == AMDGPU_GFXHUB(0)) {
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
+ gfxhub_client_ids[cid],
+ cid);
+ } else {
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ mmhub_cid = mmhub_client_ids_vega10[cid][rw];
+ break;
+ case IP_VERSION(9, 3, 0):
+ mmhub_cid = mmhub_client_ids_vega12[cid][rw];
+ break;
+ case IP_VERSION(9, 4, 0):
+ mmhub_cid = mmhub_client_ids_vega20[cid][rw];
+ break;
+ case IP_VERSION(9, 4, 1):
+ mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ mmhub_cid = mmhub_client_ids_raven[cid][rw];
+ break;
+ case IP_VERSION(1, 5, 0):
+ case IP_VERSION(2, 4, 0):
+ mmhub_cid = mmhub_client_ids_renoir[cid][rw];
+ break;
+ case IP_VERSION(1, 8, 0):
+ case IP_VERSION(9, 4, 2):
+ mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ }
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
+ .set = gmc_v9_0_vm_fault_interrupt_state,
+ .process = gmc_v9_0_process_interrupt,
+};
+
+
+static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
+ .set = gmc_v9_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+ }
+}
+
+static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ return false;
+
+ return ((vmhub == AMDGPU_MMHUB0(0) ||
+ vmhub == AMDGPU_MMHUB1(0)) &&
+ (!amdgpu_sriov_vf(adev)) &&
+ (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
+ (adev->apu_flags & AMD_APU_IS_PICASSO))));
+}
+
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+/**
+ * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table using certain type.
+ */
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+ const unsigned int eng = 17;
+ u32 j, inv_req, inv_req2, tmp;
+ struct amdgpu_vmhub *hub;
+
+ BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
+
+ hub = &adev->vmhub[vmhub];
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
+ /* Vega20+XGMI caches PTEs in TC and TLB. Add a
+ * heavy-weight TLB flush (type 2), which flushes
+ * both. Due to a race condition with concurrent
+ * memory accesses using the same TLB cache line, we
+ * still need a second TLB flush after this.
+ */
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
+ inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ } else if (flush_type == 2 &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+ adev->rev_id == 0) {
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
+ inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ } else {
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ inv_req2 = 0;
+ }
+
+ /* This is necessary for a HW workaround under SRIOV as well
+ * as GFXOFF under bare metal
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+ up_read(&adev->reset_domain->sem);
+ return;
+ }
+
+ spin_lock(&adev->gmc.invalidate_lock);
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (j = 0; j < adev->usec_timeout; j++) {
+ /* a read return value of 1 means semaphore acquire */
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ do {
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+
+ /*
+ * Issue a dummy read to wait for the ACK register to
+ * be cleared to avoid a false ACK due to the new fast
+ * GRBM interface.
+ */
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+ RREG32_NO_KIQ(hub->vm_inv_eng0_req +
+ hub->eng_distance * eng);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
+ }
+
+ inv_req = inv_req2;
+ inv_req2 = 0;
+ } while (inv_req);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (j < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
+ struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+
+ if (amdgpu_in_reset(adev))
+ return -EIO;
+
+ if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
+ /* Vega20+XGMI caches PTEs in TC and TLB. Add a
+ * heavy-weight TLB flush (type 2), which flushes
+ * both. Due to a race condition with concurrent
+ * memory accesses using the same TLB cache line, we
+ * still need a second TLB flush after this.
+ */
+ bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
+ /* 2 dwords flush + 8 dwords fence */
+ unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+ if (vega20_xgmi_wa)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, ndw);
+ if (vega20_xgmi_wa)
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, 2, all_hub);
+
+ if (flush_type == 2 &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
+ adev->rev_id == 0)
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, 0, all_hub);
+
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ up_read(&adev->reset_domain->sem);
+ return -ETIME;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+ up_read(&adev->reset_domain->sem);
+ return -ETIME;
+ }
+ up_read(&adev->reset_domain->sem);
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0), flush_type);
+ }
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
+static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
+ uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
+ unsigned int eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid,
+ unsigned int pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ /* Do nothing because there's no lut register for mmhub1. */
+ if (ring->vm_hub == AMDGPU_MMHUB1(0))
+ return;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format on VEGA 10:
+ * 63:59 reserved
+ * 58:57 mtype
+ * 56 F
+ * 55 L
+ * 54 P
+ * 53 SW
+ * 52 T
+ * 50:48 reserved
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on VEGA 10:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+
+{
+ switch (flags) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_NC:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
+ case AMDGPU_VM_MTYPE_WC:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
+ case AMDGPU_VM_MTYPE_RW:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
+ case AMDGPU_VM_MTYPE_CC:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
+ case AMDGPU_VM_MTYPE_UC:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
+ default:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
+ }
+}
+
+static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE) {
+ *flags &= ~AMDGPU_PDE_PTE;
+ if (!(*flags & AMDGPU_PTE_VALID))
+ *addr |= 1 << PAGE_SHIFT;
+ } else {
+ *flags |= AMDGPU_PTE_TF;
+ }
+ }
+}
+
+static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+ bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+ struct amdgpu_vm *vm = mapping->bo_va->base.vm;
+ unsigned int mtype_local, mtype;
+ bool snoop = false;
+ bool is_local;
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ if (is_vram) {
+ if (bo_adev == adev) {
+ if (uncached)
+ mtype = MTYPE_UC;
+ else if (coherent)
+ mtype = MTYPE_CC;
+ else
+ mtype = MTYPE_RW;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) &&
+ adev->gmc.xgmi.connected_to_cpu)
+ snoop = true;
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ if (mapping->bo_va->is_xgmi)
+ snoop = true;
+ }
+ } else {
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ snoop = true;
+ }
+ break;
+ case IP_VERSION(9, 4, 3):
+ /* Only local VRAM BOs or system memory on non-NUMA APUs
+ * can be assumed to be local in their entirety. Choose
+ * MTYPE_NC as safe fallback for all system memory BOs on
+ * NUMA systems. Their MTYPE can be overridden per-page in
+ * gmc_v9_0_override_vm_pte_flags.
+ */
+ mtype_local = MTYPE_RW;
+ if (amdgpu_mtype_local == 1) {
+ DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ mtype_local = MTYPE_NC;
+ } else if (amdgpu_mtype_local == 2) {
+ DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+ mtype_local = MTYPE_CC;
+ } else {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ }
+ is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+ num_possible_nodes() <= 1) ||
+ (is_vram && adev == bo_adev &&
+ KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id);
+ snoop = true;
+ if (uncached) {
+ mtype = MTYPE_UC;
+ } else if (adev->flags & AMD_IS_APU) {
+ mtype = is_local ? mtype_local : MTYPE_NC;
+ } else {
+ /* dGPU */
+ if (is_local)
+ mtype = mtype_local;
+ else if (is_vram)
+ mtype = MTYPE_NC;
+ else
+ mtype = MTYPE_UC;
+ }
+
+ break;
+ default:
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+
+ /* FIXME: is this still needed? Or does
+ * amdgpu_ttm_tt_pde_flags already handle this?
+ */
+ if (!is_vram)
+ snoop = true;
+ }
+
+ if (mtype != MTYPE_NC)
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(mtype);
+ *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+
+static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->tbo.resource)
+ gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
+ mapping, flags);
+}
+
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint64_t addr, uint64_t *flags)
+{
+ int local_node, nid;
+
+ /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+ * memory can use more efficient MTYPEs.
+ */
+ if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+ return;
+
+ /* Only direct-mapped memory allows us to determine the NUMA node from
+ * the DMA address.
+ */
+ if (!adev->ram_is_direct_mapped) {
+ dev_dbg(adev->dev, "RAM is not direct mapped\n");
+ return;
+ }
+
+ /* Only override mappings with MTYPE_NC, which is the safe default for
+ * cacheable memory.
+ */
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
+ dev_dbg(adev->dev, "MTYPE is not NC\n");
+ return;
+ }
+
+ /* FIXME: Only supported on native mode for now. For carve-out, the
+ * NUMA affinity of the GPU/VM needs to come from the PCI info because
+ * memory partitions are not associated with different NUMA nodes.
+ */
+ if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
+ local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
+ } else {
+ dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+ return;
+ }
+
+ /* Only handle real RAM. Mappings of PCIe resources don't have struct
+ * page or NUMA nodes.
+ */
+ if (!page_is_ram(addr >> PAGE_SHIFT)) {
+ dev_dbg(adev->dev, "Page is not RAM.\n");
+ return;
+ }
+ nid = pfn_to_nid(addr >> PAGE_SHIFT);
+ dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+ vm->mem_id, local_node, nid);
+ if (nid == local_node) {
+ uint64_t old_flags = *flags;
+ unsigned int mtype_local = MTYPE_RW;
+
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+ AMDGPU_PTE_MTYPE_VG10(mtype_local);
+ dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
+ old_flags, *flags);
+ }
+}
+
+static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+ unsigned int size;
+
+ /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+
+ switch (adev->ip_versions[DCE_HWIP][0]) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
+ case IP_VERSION(2, 1, 0):
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
+ default:
+ viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ break;
+ }
+ }
+
+ return size;
+}
+
+static enum amdgpu_memory_partition
+gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+
+ return mode;
+}
+
+static enum amdgpu_memory_partition
+gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return AMDGPU_NPS1_PARTITION_MODE;
+
+ return gmc_v9_0_get_memory_partition(adev, NULL);
+}
+
+static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
+ .map_mtype = gmc_v9_0_map_mtype,
+ .get_vm_pde = gmc_v9_0_get_vm_pde,
+ .get_vm_pte = gmc_v9_0_get_vm_pte,
+ .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
+ .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
+ .query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
+};
+
+static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
+}
+
+static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(6, 0, 0):
+ adev->umc.funcs = &umc_v6_0_funcs;
+ break;
+ case IP_VERSION(6, 1, 1):
+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+ adev->umc.retire_unit = 1;
+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+ adev->umc.ras = &umc_v6_1_ras;
+ break;
+ case IP_VERSION(6, 1, 2):
+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+ adev->umc.retire_unit = 1;
+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+ adev->umc.ras = &umc_v6_1_ras;
+ break;
+ case IP_VERSION(6, 7, 0):
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL;
+ adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+ adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->umc.ras = &umc_v6_7_ras;
+ if (1 & adev->smuio.funcs->get_die_id(adev))
+ adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
+ else
+ adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(9, 4, 1):
+ adev->mmhub.funcs = &mmhub_v9_4_funcs;
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->mmhub.funcs = &mmhub_v1_7_funcs;
+ break;
+ case IP_VERSION(1, 8, 0):
+ adev->mmhub.funcs = &mmhub_v1_8_funcs;
+ break;
+ default:
+ adev->mmhub.funcs = &mmhub_v1_0_funcs;
+ break;
+ }
+}
+
+static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(9, 4, 0):
+ adev->mmhub.ras = &mmhub_v1_0_ras;
+ break;
+ case IP_VERSION(9, 4, 1):
+ adev->mmhub.ras = &mmhub_v9_4_ras;
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->mmhub.ras = &mmhub_v1_7_ras;
+ break;
+ case IP_VERSION(1, 8, 0):
+ adev->mmhub.ras = &mmhub_v1_8_ras;
+ break;
+ default:
+ /* mmhub ras is not available */
+ break;
+ }
+}
+
+static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
+ else
+ adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
+}
+
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->hdp.ras = &hdp_v4_0_ras;
+}
+
+static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+
+ /* is UMC the right IP to check for MCA? Maybe DF? */
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(6, 7, 0):
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ mca->mp0.ras = &mca_v3_0_mp0_ras;
+ mca->mp1.ras = &mca_v3_0_mp1_ras;
+ mca->mpio.ras = &mca_v3_0_mpio_ras;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->gmc.xgmi.ras = &xgmi_ras;
+}
+
+static int gmc_v9_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
+ * in their IP discovery tables
+ */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ adev->gmc.xgmi.supported = true;
+
+ if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+ adev->gmc.xgmi.supported = true;
+ adev->gmc.xgmi.connected_to_cpu =
+ adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
+ }
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ enum amdgpu_pkg_type pkg_type =
+ adev->smuio.funcs->get_pkg_type(adev);
+ /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
+ * and the APU, can be in used two possible modes:
+ * - carveout mode
+ * - native APU mode
+ * "is_app_apu" can be used to identify the APU in the native
+ * mode.
+ */
+ adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
+ !pci_resource_len(adev->pdev, 0));
+ }
+
+ gmc_v9_0_set_gmc_funcs(adev);
+ gmc_v9_0_set_irq_funcs(adev);
+ gmc_v9_0_set_umc_funcs(adev);
+ gmc_v9_0_set_mmhub_funcs(adev);
+ gmc_v9_0_set_mmhub_ras_funcs(adev);
+ gmc_v9_0_set_gfxhub_funcs(adev);
+ gmc_v9_0_set_hdp_ras_funcs(adev);
+ gmc_v9_0_set_mca_ras_funcs(adev);
+ gmc_v9_0_set_xgmi_ras_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
+
+ return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ /*
+ * Workaround performance drop issue with VBIOS enables partial
+ * writes, while disables HBM ECC for vega10.
+ */
+ if (!amdgpu_sriov_vf(adev) &&
+ (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) {
+ if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
+ if (adev->df.funcs &&
+ adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
+ }
+ }
+
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
+ adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
+ adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+
+ if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops &&
+ adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
+ adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ }
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = adev->mmhub.funcs->get_fb_location(adev);
+
+ /* add the xgmi offset of the physical node */
+ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ amdgpu_gmc_sysvm_location(adev, mc);
+ } else {
+ amdgpu_gmc_vram_location(adev, mc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_agp_location(adev, mc);
+ }
+ /* base offset of vram pages */
+ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+
+ /* XXX: add the xgmi offset of the physical node? */
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+}
+
+/**
+ * gmc_v9_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ if (!adev->gmc.is_app_apu) {
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ } else {
+ DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+ adev->gmc.mc_vram_size = 0;
+ }
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ !adev->gmc.xgmi.connected_to_cpu) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ /*
+ * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi
+ * interface can use VRAM through here as it appears system reserved
+ * memory in host address space.
+ *
+ * For APUs, VRAM is just the stolen system memory and can be accessed
+ * directly.
+ *
+ * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
+ */
+
+ /* check whether both host-gpu and gpu-gpu xgmi links exist */
+ if ((!amdgpu_sriov_vf(adev) &&
+ (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
+ (adev->gmc.xgmi.supported &&
+ adev->gmc.xgmi.connected_to_cpu)) {
+ adev->gmc.aper_base =
+ adev->gfxhub.funcs->get_mc_fb_offset(adev) +
+ adev->gmc.xgmi.physical_node_id *
+ adev->gmc.xgmi.node_segment_size;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+
+#endif
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1): /* all engines support GPUVM */
+ case IP_VERSION(9, 2, 1): /* all engines support GPUVM */
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ case IP_VERSION(9, 4, 3):
+ default:
+ adev->gmc.gart_size = 512ULL << 20;
+ break;
+ case IP_VERSION(9, 1, 0): /* DCE SG support */
+ case IP_VERSION(9, 2, 2): /* DCE SG support */
+ case IP_VERSION(9, 3, 0):
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
+
+ adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
+
+ gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "VEGA10 PCIE GART already initialized\n");
+ return 0;
+ }
+
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.vmid0_page_table_depth = 1;
+ adev->gmc.vmid0_page_table_block_size = 12;
+ } else {
+ adev->gmc.vmid0_page_table_depth = 0;
+ adev->gmc.vmid0_page_table_block_size = 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU\n");
+ r = amdgpu_gart_table_ram_alloc(adev);
+ if (r)
+ dev_err(adev->dev, "Failed to allocate GART in system memory\n");
+ } else {
+ r = amdgpu_gart_table_vram_alloc(adev);
+ if (r)
+ return r;
+
+ if (adev->gmc.xgmi.connected_to_cpu)
+ r = amdgpu_gmc_pdb0_alloc(adev);
+ }
+
+ return r;
+}
+
+/**
+ * gmc_v9_0_save_registers - saves regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This saves potential register values that should be
+ * restored upon resume
+ */
+static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
+{
+ if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
+ (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1)))
+ adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
+}
+
+static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ int num_ranges = 0, ret, mem_groups;
+ struct amdgpu_numa_info numa_info;
+ int node_ids[MAX_MEM_RANGES];
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+ mem_groups = hweight32(adev->aid_mask);
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (gmc_v9_0_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+
+ /* If there is only partition, don't use entire size */
+ if (adev->gmc.num_mem_partitions == 1) {
+ mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
+ do_div(mem_ranges[0].size, mem_groups);
+ }
+}
+
+static void
+gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i;
+
+ mode = gmc_v9_0_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ }
+
+ size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[adev->gmc.num_mem_partitions - 1].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
+ << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kzalloc(
+ MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ /* TODO : Get the range from PSP/Discovery for dGPU */
+ if (adev->gmc.is_app_apu)
+ gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = gmc_v9_0_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_WARN(adev->dev,
+ "Mem ranges not matching with hardware config");
+ }
+
+ return 0;
+}
+
+static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
+{
+ static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+ u32 vram_info;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+ adev->gmc.vram_vendor = vram_info & 0xF;
+ }
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+}
+
+static int gmc_v9_0_sw_init(void *handle)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned long inst_mask = adev->aid_mask;
+
+ adev->gfxhub.funcs->init(adev);
+
+ adev->mmhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ gmc_v9_4_3_init_vram_info(adev);
+ } else if (!adev->bios) {
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ adev->gmc.vram_width = 64 * 64;
+ } else {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+ }
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
+
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+ if (adev->df.funcs &&
+ adev->df.funcs->get_hbm_channel_number) {
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
+ }
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+ }
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+
+ if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ } else {
+ /* vm_size is 128TB + 512GB for legacy 3-level page support */
+ amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
+ adev->gmc.translate_further =
+ adev->vm_manager.num_level > 1;
+ }
+ break;
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 3, 0):
+ case IP_VERSION(9, 4, 2):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size of Vega10,
+ * block size 512 (9bit)
+ */
+ /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
+ else
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
+ case IP_VERSION(9, 4, 1):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask);
+
+ /* Keep the vm size same with Vega20 */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
+ case IP_VERSION(9, 4, 3):
+ bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+ NUM_XCC(adev->gfx.xcc_mask));
+
+ inst_mask <<= AMDGPU_MMHUB0(0);
+ bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32);
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->gmc.xgmi.connected_to_cpu) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /* Set the internal MC address mask
+ * This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44;
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
+ if (r) {
+ dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+ adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
+
+ r = gmc_v9_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ r = gmc_v9_0_init_mem_ranges(adev);
+ if (r)
+ return r;
+ }
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v9_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1..n-1
+ * amdkfd will use VMIDs n..15
+ *
+ * The first KFD VMID is 8 for GPUs with graphics, 3 for
+ * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
+ * for video processing.
+ */
+ adev->vm_manager.first_kfd_vmid =
+ (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? 3 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ gmc_v9_0_save_registers(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ amdgpu_gmc_sysfs_init(adev);
+
+ return 0;
+}
+
+static int gmc_v9_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ amdgpu_gmc_sysfs_fini(adev);
+
+ amdgpu_gmc_ras_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_vm_manager_fini(adev);
+ if (!adev->gmc.real_vram_size) {
+ dev_info(adev->dev, "Put GART in system memory for APU free\n");
+ amdgpu_gart_table_ram_free(adev);
+ } else {
+ amdgpu_gart_table_vram_free(adev);
+ }
+ amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
+ amdgpu_bo_fini(adev);
+
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
+ return 0;
+}
+
+static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
+{
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ if (amdgpu_sriov_vf(adev))
+ break;
+ fallthrough;
+ case IP_VERSION(9, 4, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_mmhub_1_0_0,
+ ARRAY_SIZE(golden_settings_mmhub_1_0_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_athub_1_0_0,
+ ARRAY_SIZE(golden_settings_athub_1_0_0));
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 0):
+ /* TODO for renoir */
+ soc15_program_register_sequence(adev,
+ golden_settings_athub_1_0_0,
+ ARRAY_SIZE(golden_settings_athub_1_0_0));
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * gmc_v9_0_restore_registers - restores regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This restores register values, saved at suspend.
+ */
+void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
+{
+ if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
+ (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) {
+ WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
+ WARN_ON(adev->gmc.sdpif_register !=
+ RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
+ }
+}
+
+/**
+ * gmc_v9_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gmc.xgmi.connected_to_cpu)
+ amdgpu_gmc_init_pdb0(adev);
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ if (!adev->in_s0ix) {
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+ }
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ DRM_INFO("PCIE GART of %uM enabled.\n",
+ (unsigned int)(adev->gmc.gart_size >> 20));
+ if (adev->gmc.pdb0_bo)
+ DRM_INFO("PDB0 located at 0x%016llX\n",
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
+ DRM_INFO("PTB located at 0x%016llX\n",
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v9_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+ int i, r;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v9_0_init_golden_registers(adev);
+
+ if (adev->mode_info.num_crtc) {
+ /* Lockout access through VGA aperture*/
+ WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+ /* disable VGA render */
+ WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ }
+
+ if (adev->mmhub.funcs->update_power_gating)
+ adev->mmhub.funcs->update_power_gating(adev, true);
+
+ adev->hdp.funcs->init_registers(adev);
+
+ /* After HDP is initialized, flush HDP.*/
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ }
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0)))
+ continue;
+ gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
+ }
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ r = gmc_v9_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_emu_mode == 1)
+ return amdgpu_gmc_vram_checking(adev);
+ else
+ return r;
+}
+
+/**
+ * gmc_v9_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
+{
+ if (!adev->in_s0ix)
+ adev->gfxhub.funcs->gart_disable(adev);
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v9_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v9_0_gart_disable(adev);
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ /*
+ * Pair the operations did in gmc_v9_0_hw_init and thus maintain
+ * a correct cached state for GMC. Otherwise, the "gate" again
+ * operation on S3 resuming will fail due to wrong cached state.
+ */
+ if (adev->mmhub.funcs->update_power_gating)
+ adev->mmhub.funcs->update_power_gating(adev, false);
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ return 0;
+}
+
+static int gmc_v9_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return gmc_v9_0_hw_fini(adev);
+}
+
+static int gmc_v9_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v9_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v9_0_is_idle(void *handle)
+{
+ /* MC is always ready in GMC v9.*/
+ return true;
+}
+
+static int gmc_v9_0_wait_for_idle(void *handle)
+{
+ /* There is no need to wait for MC idle in GMC v9.*/
+ return 0;
+}
+
+static int gmc_v9_0_soft_reset(void *handle)
+{
+ /* XXX for emulation.*/
+ return 0;
+}
+
+static int gmc_v9_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mmhub.funcs->set_clockgating(adev, state);
+
+ athub_v1_0_set_clockgating(adev, state);
+
+ return 0;
+}
+
+static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v1_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v9_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
+ .name = "gmc_v9_0",
+ .early_init = gmc_v9_0_early_init,
+ .late_init = gmc_v9_0_late_init,
+ .sw_init = gmc_v9_0_sw_init,
+ .sw_fini = gmc_v9_0_sw_fini,
+ .hw_init = gmc_v9_0_hw_init,
+ .hw_fini = gmc_v9_0_hw_fini,
+ .suspend = gmc_v9_0_suspend,
+ .resume = gmc_v9_0_resume,
+ .is_idle = gmc_v9_0_is_idle,
+ .wait_for_idle = gmc_v9_0_wait_for_idle,
+ .soft_reset = gmc_v9_0_soft_reset,
+ .set_clockgating_state = gmc_v9_0_set_clockgating_state,
+ .set_powergating_state = gmc_v9_0_set_powergating_state,
+ .get_clockgating_state = gmc_v9_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v9_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 9,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v9_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
new file mode 100644
index 000000000..c415c439f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V9_0_H__
+#define __GMC_V9_0_H__
+
+extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
+
+void gmc_v9_0_restore_registers(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
new file mode 100644
index 000000000..71d1a2e3b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "hdp_v4_0.h"
+#include "amdgpu_ras.h"
+
+#include "hdp/hdp_4_0_offset.h"
+#include "hdp/hdp_4_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/* for Vega20 register name change */
+#define mmHDP_MEM_POWER_CTRL 0x00d4
+#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK 0x00000001L
+#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK 0x00000002L
+#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L
+#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
+#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
+
+static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+}
+
+static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0) ||
+ adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 2))
+ return;
+
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+}
+
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+ err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+};
+
+static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ if (adev->ip_versions[HDP_HWIP][0] >= IP_VERSION(4, 4, 0))
+ WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+ else
+ /*read back hdp ras counter to reset it to 0 */
+ RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+}
+
+static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 0) ||
+ adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 1) ||
+ adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 1) ||
+ adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 0)) {
+ def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+ else
+ data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data);
+ } else {
+ def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL));
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK |
+ HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK |
+ HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK |
+ HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK;
+ else
+ data &= ~(HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK |
+ HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK |
+ HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK |
+ HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK);
+
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL), data);
+ }
+}
+
+static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_HDP_LS */
+ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
+ if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+}
+
+static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[HDP_HWIP][0]) {
+ case IP_VERSION(4, 2, 1):
+ WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+
+ if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0))
+ WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
+
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
+}
+
+struct amdgpu_ras_block_hw_ops hdp_v4_0_ras_hw_ops = {
+ .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+ .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+};
+
+struct amdgpu_hdp_ras hdp_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &hdp_v4_0_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
+ .flush_hdp = hdp_v4_0_flush_hdp,
+ .invalidate_hdp = hdp_v4_0_invalidate_hdp,
+ .update_clock_gating = hdp_v4_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v4_0_get_clockgating_state,
+ .init_registers = hdp_v4_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
new file mode 100644
index 000000000..c44eee928
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V4_0_H__
+#define __HDP_V4_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern struct amdgpu_hdp_ras hdp_v4_0_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
new file mode 100644
index 000000000..a9ea23fa0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "hdp_v5_0.h"
+
+#include "hdp/hdp_5_0_0_offset.h"
+#include "hdp/hdp_5_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+}
+
+static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
+}
+
+static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ IPH_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* HDP 5.0 doesn't support dynamic power mode switch,
+ * disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, 1);
+ /* RC should not use shut down mode, fallback to ds or ls if allowed */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ IPH_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_0_update_mem_power_gating(adev, enable);
+ hdp_v5_0_update_medium_grain_clock_gating(adev, enable);
+}
+
+static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
+ tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
+ WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
+}
+
+const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
+ .flush_hdp = hdp_v5_0_flush_hdp,
+ .invalidate_hdp = hdp_v5_0_invalidate_hdp,
+ .update_clock_gating = hdp_v5_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_0_get_clockgating_state,
+ .init_registers = hdp_v5_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h
new file mode 100644
index 000000000..2d5ec2b41
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V5_0_H__
+#define __HDP_V5_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v5_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
new file mode 100644
index 000000000..29c3484ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "hdp_v5_2.h"
+
+#include "hdp/hdp_5_2_1_offset.h"
+#include "hdp/hdp_5_2_1_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+ else
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+}
+
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
+const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
+ .flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
new file mode 100644
index 000000000..cb2abc0c8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V5_2_H__
+#define __HDP_V5_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v5_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
new file mode 100644
index 000000000..063eba619
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "hdp_v6_0.h"
+
+#include "hdp/hdp_6_0_0_offset.h"
+#include "hdp/hdp_6_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+}
+
+static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
+ .flush_hdp = hdp_v6_0_flush_hdp,
+ .update_clock_gating = hdp_v6_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v6_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
new file mode 100644
index 000000000..533ecd8c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V6_0_H__
+#define __HDP_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
new file mode 100644
index 000000000..aecad530b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "vid.h"
+
+#include "oss/oss_2_4_d.h"
+#include "oss/oss_2_4_sh_mask.h"
+
+#include "bif/bif_5_1_d.h"
+#include "bif/bif_5_1_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written. When the
+ * pointers are equal, the ring is idle. When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr. When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * iceland_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (VI).
+ */
+static void iceland_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
+ WREG32(mmIH_CNTL, ih_cntl);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+/**
+ * iceland_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (VI).
+ */
+static void iceland_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+ u32 ih_cntl = RREG32(mmIH_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 0);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ WREG32(mmIH_CNTL, ih_cntl);
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+/**
+ * iceland_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int iceland_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ int rb_bufsz;
+ u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+
+ /* disable irqs */
+ iceland_ih_disable_interrupts(adev);
+
+ /* setup interrupt control */
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+
+ rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+ ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
+
+ /* set the writeback address whether it's enabled or not */
+ WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+ WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+
+ /* Default settings for IH_CNTL (disabled at first) */
+ ih_cntl = RREG32(mmIH_CNTL);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, MC_VMID, 0);
+
+ if (adev->irq.msi_enabled)
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, RPTR_REARM, 1);
+ WREG32(mmIH_CNTL, ih_cntl);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ iceland_ih_enable_interrupts(adev);
+
+ return 0;
+}
+
+/**
+ * iceland_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VI).
+ */
+static void iceland_ih_irq_disable(struct amdgpu_device *adev)
+{
+ iceland_ih_disable_interrupts(adev);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * iceland_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VI). Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cz_irq_process(VI).
+ * Returns the value of the wptr.
+ */
+static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32(mmIH_RB_WPTR);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 16). Hopefully
+ * this should allow us to catchup.
+ */
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * iceland_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to decode
+ * @entry: IV entry to place decoded information into
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void iceland_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[4];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ entry->src_id = dw[0] & 0xff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
+ entry->ring_id = dw[2] & 0xff;
+ entry->vmid = (dw[2] >> 8) & 0xff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 16;
+}
+
+/**
+ * iceland_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void iceland_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ WREG32(mmIH_RB_RPTR, ih->rptr);
+}
+
+static int iceland_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret)
+ return ret;
+
+ iceland_ih_set_interrupt_funcs(adev);
+
+ return 0;
+}
+
+static int iceland_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int iceland_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+ amdgpu_irq_remove_domain(adev);
+
+ return 0;
+}
+
+static int iceland_ih_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return iceland_ih_irq_init(adev);
+}
+
+static int iceland_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ iceland_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int iceland_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return iceland_ih_hw_fini(adev);
+}
+
+static int iceland_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return iceland_ih_hw_init(adev);
+}
+
+static bool iceland_ih_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return false;
+
+ return true;
+}
+
+static int iceland_ih_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS);
+ if (!REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int iceland_ih_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+ SOFT_RESET_IH, 1);
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int iceland_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int iceland_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs iceland_ih_ip_funcs = {
+ .name = "iceland_ih",
+ .early_init = iceland_ih_early_init,
+ .late_init = NULL,
+ .sw_init = iceland_ih_sw_init,
+ .sw_fini = iceland_ih_sw_fini,
+ .hw_init = iceland_ih_hw_init,
+ .hw_fini = iceland_ih_hw_fini,
+ .suspend = iceland_ih_suspend,
+ .resume = iceland_ih_resume,
+ .is_idle = iceland_ih_is_idle,
+ .wait_for_idle = iceland_ih_wait_for_idle,
+ .soft_reset = iceland_ih_soft_reset,
+ .set_clockgating_state = iceland_ih_set_clockgating_state,
+ .set_powergating_state = iceland_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs iceland_ih_funcs = {
+ .get_wptr = iceland_ih_get_wptr,
+ .decode_iv = iceland_ih_decode_iv,
+ .set_rptr = iceland_ih_set_rptr
+};
+
+static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &iceland_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version iceland_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 2,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &iceland_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.h b/drivers/gpu/drm/amd/amdgpu/iceland_ih.h
new file mode 100644
index 000000000..3235f4277
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ICELAND_IH_H__
+#define __ICELAND_IH_H__
+
+extern const struct amdgpu_ip_block_version iceland_ih_ip_block;
+
+#endif /* __ICELAND_IH_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
new file mode 100644
index 000000000..ee6a041cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
@@ -0,0 +1,2172 @@
+/*
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ICELAND_SDMA_PKT_OPEN_H_
+#define __ICELAND_SDMA_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_GEN_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+
+/*define for op field*/
+#define SDMA_PKT_HEADER_op_offset 0
+#define SDMA_PKT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_op_shift 0
+#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_HEADER_sub_op_offset 0
+#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_sub_op_shift 8
+#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_shift 14
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_shift 22
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_mask) << SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_3_height_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_height_shift 16
+#define SDMA_PKT_COPY_TILED_DW_3_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_height_mask) << SDMA_PKT_COPY_TILED_DW_3_height_shift)
+
+/*define for DW_4 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_slice_pitch_mask) << SDMA_PKT_COPY_TILED_DW_4_slice_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00000FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 11
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_shift)
+
+/*define for DW_6 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIT_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_shift 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_BANK_W(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_BANK_H(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_shift 21
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_NUM_BANK(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00000FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_ha field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_shift 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_HA(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_pitch_in_tile field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_mask) << SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_pitch_in_tile field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_shift)
+
+/*define for DW_11 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_shift)
+
+/*define for DW_5 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_ha field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_shift 22
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_HA(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_ha field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_shift 30
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_HA(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_PITCH_IN_TILE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_mask) << SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_height_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_height_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_3_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_height_mask) << SDMA_PKT_WRITE_TILED_DW_3_height_shift)
+
+/*define for DW_4 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_SLICE_PITCH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_mask) << SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00000FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0000FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+#endif /* __ICELAND_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
new file mode 100644
index 000000000..ec0c8f8b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_0_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_0_self_irq_funcs = {
+ .process = ih_v6_0_self_irq,
+};
+
+static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
+}
+
+static int ih_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_0_set_interrupt_funcs(adev);
+ ih_v6_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_0_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr =
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ adev->irq.ih1.ring_size = 0;
+ adev->irq.ih2.ring_size = 0;
+
+ /* initialize ih control register offset */
+ ih_v6_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = ih_v6_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_0_irq_disable(adev);
+
+ return 0;
+}
+
+static int ih_v6_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ih_v6_0_hw_fini(adev);
+}
+
+static int ih_v6_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ih_v6_0_hw_init(adev);
+}
+
+static bool ih_v6_0_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_0_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_0_soft_reset(void *handle)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
+ .name = "ih_v6_0",
+ .early_init = ih_v6_0_early_init,
+ .late_init = NULL,
+ .sw_init = ih_v6_0_sw_init,
+ .sw_fini = ih_v6_0_sw_fini,
+ .hw_init = ih_v6_0_hw_init,
+ .hw_fini = ih_v6_0_hw_fini,
+ .suspend = ih_v6_0_suspend,
+ .resume = ih_v6_0_resume,
+ .is_idle = ih_v6_0_is_idle,
+ .wait_for_idle = ih_v6_0_wait_for_idle,
+ .soft_reset = ih_v6_0_soft_reset,
+ .set_clockgating_state = ih_v6_0_set_clockgating_state,
+ .set_powergating_state = ih_v6_0_set_powergating_state,
+ .get_clockgating_state = ih_v6_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
+ .get_wptr = ih_v6_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_0_set_rptr
+};
+
+static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
new file mode 100644
index 000000000..f27b55580
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_0_IH_H__
+#define __IH_V6_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
new file mode 100644
index 000000000..8fb05eae3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_6_1_0_offset.h"
+#include "oss/osssys_6_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v6_1.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v6_1_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V6_0).
+ */
+static void ih_v6_1_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v6_1_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V6_0).
+ */
+static int ih_v6_1_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v6_1_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v6_1_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v6_1_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v6_1_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V6_0)
+ */
+static int ih_v6_1_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v6_1_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v6_1_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v6_1_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v6_1_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v6_1_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v6_1_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v6_1_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v6_1_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v6_1_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v6_1_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v6_1_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void ih_v6_1_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v6_1_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void ih_v6_1_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v6_1_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v6_1_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v6_1_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v6_1_self_irq_funcs = {
+ .process = ih_v6_1_self_irq,
+};
+
+static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
+}
+
+static int ih_v6_1_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_1_set_interrupt_funcs(adev);
+ ih_v6_1_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v6_1_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr =
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ adev->irq.ih1.ring_size = 0;
+ adev->irq.ih2.ring_size = 0;
+
+ /* initialize ih control register offset */
+ ih_v6_1_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v6_1_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v6_1_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = ih_v6_1_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v6_1_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_1_irq_disable(adev);
+
+ return 0;
+}
+
+static int ih_v6_1_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ih_v6_1_hw_fini(adev);
+}
+
+static int ih_v6_1_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ih_v6_1_hw_init(adev);
+}
+
+static bool ih_v6_1_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v6_1_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v6_1_soft_reset(void *handle)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v6_1_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ ih_v6_1_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v6_1_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v6_1_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
+ .name = "ih_v6_1",
+ .early_init = ih_v6_1_early_init,
+ .late_init = NULL,
+ .sw_init = ih_v6_1_sw_init,
+ .sw_fini = ih_v6_1_sw_fini,
+ .hw_init = ih_v6_1_hw_init,
+ .hw_fini = ih_v6_1_hw_fini,
+ .suspend = ih_v6_1_suspend,
+ .resume = ih_v6_1_resume,
+ .is_idle = ih_v6_1_is_idle,
+ .wait_for_idle = ih_v6_1_wait_for_idle,
+ .soft_reset = ih_v6_1_soft_reset,
+ .set_clockgating_state = ih_v6_1_set_clockgating_state,
+ .set_powergating_state = ih_v6_1_set_powergating_state,
+ .get_clockgating_state = ih_v6_1_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
+ .get_wptr = ih_v6_1_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v6_1_set_rptr
+};
+
+static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v6_1_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v6_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
new file mode 100644
index 000000000..2232bc5cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V6_1_IH_H__
+#define __IH_V6_1_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
new file mode 100644
index 000000000..4ab90c785
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
+
+static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+ char fw_name[40];
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_imu.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, fw_name);
+ if (err)
+ goto out;
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+ //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx11: Failed to load firmware \"%s\"\n",
+ fw_name);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ //amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ int i, imu_reg_val = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v11_0_setup(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //enable IMU debug mode
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+ }
+
+ //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x10007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+}
+
+static int imu_v11_0_start(struct amdgpu_device *adev)
+{
+ int imu_reg_val;
+
+ //Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v11_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS , 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS , 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10201000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000080, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000003f7, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x0fffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000545, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x13455431, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76027602, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x76207620, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000345, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x0000003e, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] =
+{
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ef, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0x00000f01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x000000e4, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x01231023, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000243, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000001ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000061ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00002825, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000501, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
+};
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11));
+ break;
+ case IP_VERSION(11, 0, 2):
+ program_imu_rlc_ram(adev, imu_rlc_ram_golden_11_0_2,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_2));
+ break;
+ case IP_VERSION(11, 0, 3):
+ imu_v11_0_3_program_rlc_ram(adev);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the contents of the RAM are valid
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
+ .init_microcode = imu_v11_0_init_microcode,
+ .load_microcode = imu_v11_0_load_microcode,
+ .setup_imu = imu_v11_0_setup,
+ .start_imu = imu_v11_0_start,
+ .program_rlc_ram = imu_v11_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v11_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
new file mode 100644
index 000000000..e71f96fc2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_H__
+#define __IMU_V11_0_H__
+
+extern const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
new file mode 100644
index 000000000..fc69c1a29
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "imu_v11_0_3.h"
+
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_3[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_RD_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_IO_WR_COMBINE_FLUSH, 0x00055555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_DRAM_COMBINE_FLUSH, 0x00555555, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC2, 0x00001ffe, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_CREDITS, 0x003f3fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_TAG_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE0, 0x00041000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCC_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE0, 0x00040000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_VCD_RESERVE1, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_MISC, 0x00000017, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGUS_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_CREDITS, 0x003f3fbf, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE0, 0x10200800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_TAG_RESERVE1, 0x00000088, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE0, 0x1d041040, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_VCC_RESERVE1, 0x80000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_IO_PRIORITY, 0x88888888, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MAM_CTRL, 0x0000d800, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ARB_FINAL, 0x000007ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_DRAM_PAGE_BURST, 0x20080200, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_SDP_ENABLE, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0x000fffff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCEA_MISC, 0x0c48bff0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SA_UNIT_DISABLE, 0x00fffc01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_PRIM_CONFIG, 0x000fffe1, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_RB_BACKEND_DISABLE, 0xffffff01, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x40000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xfffe0001, 0x42000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x44000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x46000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x48000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG, 0xffff0001, 0x4A000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCGTS_TCC_DISABLE, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_SHADER_RATE_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCC_GC_EDC_CONFIG, 0x00000001, 0x00000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x000005ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x000065ff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x00000fff, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT0_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000001, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CONTEXT1_PER_PFVF_PTE_CACHE_FRAGMENT_SIZES, 0x00000c00, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000444, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x54105410, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x76323276, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x00000244, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCUTCL2_HARVEST_BYPASS_GROUPS, 0x00000006, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0000000c, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x00020000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_UCODE_SELFLOAD_CONTROL, 0x00000210, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPC_PSP_DEBUG, CPC_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0xe0000000),
+};
+
+static void program_rlc_ram_register_setting(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+}
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev)
+{
+ program_rlc_ram_register_setting(adev,
+ imu_rlc_ram_golden_11_0_3,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11_0_3));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
new file mode 100644
index 000000000..702be568f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V11_0_3_H__
+#define __IMU_V11_0_3_H__
+
+void imu_v11_0_3_program_rlc_ram(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
new file mode 100644
index 000000000..77595e962
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v1_0.h"
+#include "jpeg_v1_0.h"
+
+#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+
+static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[(*ptr)++] = 0;
+ ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+ } else {
+ ring->ring[(*ptr)++] = reg_offset;
+ ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+ }
+ ring->ring[(*ptr)++] = val;
+}
+
+static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ uint32_t reg, reg_offset, val, mask, i;
+
+ // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+ reg_offset = (reg << 2);
+ val = lower_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+ reg_offset = (reg << 2);
+ val = upper_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 3rd to 5th: issue MEM_READ commands
+ for (i = 0; i <= 2; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+ ring->ring[ptr++] = 0;
+ }
+
+ // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x13;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 7th: program mmUVD_JRBC_RB_REF_DATA
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ mask = 0x1;
+
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = 0x01400200;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = val;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[ptr++] = 0;
+ ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+ } else {
+ ring->ring[ptr++] = reg_offset;
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+ }
+ ring->ring[ptr++] = mask;
+
+ //9th to 21st: insert no-op
+ for (i = 0; i <= 12; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ring->ring[ptr++] = 0;
+ }
+
+ //22nd: reset mmUVD_JRBC_RB_RPTR
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+ reg_offset = (reg << 2);
+ val = 0;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x12;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0xffffffff);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ /* emit trap */
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG decode TRAP\n");
+
+ switch (entry->src_id) {
+ case 126:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+int jpeg_v1_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v1_0_set_dec_ring_funcs(adev);
+ jpeg_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+int jpeg_v1_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] =
+ SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG free up sw allocation
+ */
+void jpeg_v1_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
+}
+
+/**
+ * jpeg_v1_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ * @mode: SPG or DPG mode
+ *
+ * Setup and start the JPEG block
+ */
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+
+ if (mode == 0) {
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ }
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ jpeg_v1_0_decode_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+}
+
+static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .nop = PACKET0(0x81ff, 0),
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .extra_dw = 64,
+ .get_rptr = jpeg_v1_0_decode_ring_get_rptr,
+ .get_wptr = jpeg_v1_0_decode_ring_get_wptr,
+ .set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
+ 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
+ .emit_ib = jpeg_v1_0_decode_ring_emit_ib,
+ .emit_fence = jpeg_v1_0_decode_ring_emit_fence,
+ .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v1_0_decode_ring_nop,
+ .insert_start = jpeg_v1_0_decode_ring_insert_start,
+ .insert_end = jpeg_v1_0_decode_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = jpeg_v1_0_ring_begin_use,
+ .end_use = vcn_v1_0_ring_end_use,
+ .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
+ .set = jpeg_v1_0_set_interrupt_state,
+ .process = jpeg_v1_0_process_interrupt,
+};
+
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
+}
+
+static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ int cnt = 0;
+
+ mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+ if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
+ DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
+
+ for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
+ DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
+ }
+
+ vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
new file mode 100644
index 000000000..bbf33a6a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V1_0_H__
+#define __JPEG_V1_0_H__
+
+int jpeg_v1_0_early_init(void *handle);
+int jpeg_v1_0_sw_init(void *handle);
+void jpeg_v1_0_sw_fini(void *handle);
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+
+#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
new file mode 100644
index 000000000..1c8116d75
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -0,0 +1,816 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v2_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v2_0_set_dec_ring_funcs(adev);
+ jpeg_v2_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (!r)
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+ jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_0_hw_init(adev);
+
+ return r;
+}
+
+static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int r = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ return 0;
+}
+
+static int jpeg_v2_0_enable_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data;
+ int r = 0;
+
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS));
+ data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ data |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v2_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v2_0_disable_clock_gating(adev);
+
+ WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable JPEG CGC */
+ jpeg_v2_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v2_0_enable_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ if (!jpeg_v2_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v2_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_0_stop(adev);
+ else
+ ret = jpeg_v2_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
+ .name = "jpeg_v2_0",
+ .early_init = jpeg_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_0_sw_init,
+ .sw_fini = jpeg_v2_0_sw_fini,
+ .hw_init = jpeg_v2_0_hw_init,
+ .hw_fini = jpeg_v2_0_hw_fini,
+ .suspend = jpeg_v2_0_suspend,
+ .resume = jpeg_v2_0_resume,
+ .is_idle = jpeg_v2_0_is_idle,
+ .wait_for_idle = jpeg_v2_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v2_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
+ .set = jpeg_v2_0_set_interrupt_state,
+ .process = jpeg_v2_0_process_interrupt,
+};
+
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
new file mode 100644
index 000000000..654e43e83
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_0_H__
+#define __JPEG_V2_0_H__
+
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr);
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+
+extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
+
+#endif /* __JPEG_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
new file mode 100644
index 000000000..aadb74de5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v2_5.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * jpeg_v2_5_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_5_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 harvest;
+ int i;
+
+ adev->jpeg.num_jpeg_rings = 1;
+ adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->jpeg.harvest_config |= 1 << i;
+ }
+ if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 |
+ AMDGPU_JPEG_HARVEST_JPEG1))
+ return -ENOENT;
+
+ jpeg_v2_5_set_dec_ring_funcs(adev);
+ jpeg_v2_5_set_irq_funcs(adev);
+ jpeg_v2_5_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_5_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ }
+
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_5_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_5_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_5_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
+ jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_5_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_5_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_5_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_5_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_5_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v2_6_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v2_6_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x6aa04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80000000 | (1 << (ring->me * 2 + 14)));
+}
+
+/**
+ * jpeg_v2_6_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x6aa04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14)));
+}
+
+static bool jpeg_v2_5_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+
+ return ret;
+}
+
+static int jpeg_v2_5_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_5_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (!jpeg_v2_5_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v2_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_5_stop(adev);
+ else
+ ret = jpeg_v2_5_start(adev);
+
+ if(!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
+ .name = "jpeg_v2_5",
+ .early_init = jpeg_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_5_sw_init,
+ .sw_fini = jpeg_v2_5_sw_fini,
+ .hw_init = jpeg_v2_5_hw_init,
+ .hw_fini = jpeg_v2_5_hw_fini,
+ .suspend = jpeg_v2_5_suspend,
+ .resume = jpeg_v2_5_resume,
+ .is_idle = jpeg_v2_5_is_idle,
+ .wait_for_idle = jpeg_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_5_set_powergating_state,
+};
+
+static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
+ .name = "jpeg_v2_6",
+ .early_init = jpeg_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_5_sw_init,
+ .sw_fini = jpeg_v2_5_sw_fini,
+ .hw_init = jpeg_v2_5_hw_init,
+ .hw_fini = jpeg_v2_5_hw_fini,
+ .suspend = jpeg_v2_5_suspend,
+ .resume = jpeg_v2_5_resume,
+ .is_idle = jpeg_v2_5_is_idle,
+ .wait_for_idle = jpeg_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_5_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_6_dec_ring_insert_start,
+ .insert_end = jpeg_v2_6_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+ if (adev->asic_type == CHIP_ARCTURUS)
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ else /* CHIP_ALDEBARAN */
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
+ DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
+ .set = jpeg_v2_5_set_interrupt_state,
+ .process = jpeg_v2_5_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_6_ras_irq_funcs = {
+ .set = jpeg_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+
+ adev->jpeg.inst[i].ras_poison_irq.num_types = 1;
+ adev->jpeg.inst[i].ras_poison_irq.funcs = &jpeg_v2_6_ras_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &jpeg_v2_5_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version jpeg_v2_6_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 6,
+ .rev = 0,
+ .funcs = &jpeg_v2_6_ip_funcs,
+};
+
+static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V2_6_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V2_6_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = {
+ .query_poison_status = jpeg_v2_6_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[JPEG_HWIP][0]) {
+ case IP_VERSION(2, 6, 0):
+ adev->jpeg.ras = &jpeg_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
new file mode 100644
index 000000000..1e858c6cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_5_H__
+#define __JPEG_V2_5_H__
+
+enum amdgpu_jpeg_v2_6_sub_block {
+ AMDGPU_JPEG_V2_6_JPEG0 = 0,
+ AMDGPU_JPEG_V2_6_JPEG1,
+
+ AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
+extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block;
+
+#endif /* __JPEG_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
new file mode 100644
index 000000000..df4440c21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_3_0_0_offset.h"
+#include "vcn/vcn_3_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v3_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ u32 harvest;
+
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ break;
+ default:
+ harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ return -ENOENT;
+ break;
+ }
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v3_0_set_dec_ring_funcs(adev);
+ jpeg_v3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v3_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v3_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v3_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v3_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+ jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v3_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v3_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v3_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v3_0_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v3_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v3_0_disable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v3_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, 0, mmJPEG_ENC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v3_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v3_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v3_0_enable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v3_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v3_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v3_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v3_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v3_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v3_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v3_0_stop(adev);
+ else
+ ret = jpeg_v3_0_start(adev);
+
+ if(!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
+ .name = "jpeg_v3_0",
+ .early_init = jpeg_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v3_0_sw_init,
+ .sw_fini = jpeg_v3_0_sw_fini,
+ .hw_init = jpeg_v3_0_hw_init,
+ .hw_fini = jpeg_v3_0_hw_fini,
+ .suspend = jpeg_v3_0_suspend,
+ .resume = jpeg_v3_0_resume,
+ .is_idle = jpeg_v3_0_is_idle,
+ .wait_for_idle = jpeg_v3_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v3_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v3_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v3_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v3_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v3_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v3_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v3_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
+ .set = jpeg_v3_0_set_interrupt_state,
+ .process = jpeg_v3_0_process_interrupt,
+};
+
+static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v3_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v3_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h
new file mode 100644
index 000000000..ce775a0c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V3_0_H__
+#define __JPEG_V3_0_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v3_0_ip_block;
+
+#endif /* __JPEG_V3_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
new file mode 100644
index 000000000..3eb3dcd56
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -0,0 +1,841 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * jpeg_v4_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_set_dec_ring_funcs(adev);
+ jpeg_v4_0_set_irq_funcs(adev);
+ jpeg_v4_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1);
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v4_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_start_sriov(adev);
+ if (r)
+ return r;
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ } else {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v4_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v4_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ regUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v4_0_disable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ header.version = MMSCH_VERSION;
+ header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE);
+
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = 0;
+ header.jpegdec.table_size = 0;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ ring = adev->jpeg.inst->ring_dec;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+ regUVD_JRBC_RB_SIZE), ring->ring_size / 4);
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.jpegdec.init_status = 0;
+ header.jpegdec.table_offset = header.total_size;
+ header.jpegdec.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->jpegdec.init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+
+ return 0;
+
+}
+
+/**
+ * jpeg_v4_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v4_0_enable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v4_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v4_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v4_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v4_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v4_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_stop(adev);
+ else
+ ret = jpeg_v4_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
+ .name = "jpeg_v4_0",
+ .early_init = jpeg_v4_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v4_0_sw_init,
+ .sw_fini = jpeg_v4_0_sw_fini,
+ .hw_init = jpeg_v4_0_hw_init,
+ .hw_fini = jpeg_v4_0_hw_fini,
+ .suspend = jpeg_v4_0_suspend,
+ .resume = jpeg_v4_0_resume,
+ .is_idle = jpeg_v4_0_is_idle,
+ .wait_for_idle = jpeg_v4_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v4_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs;
+ DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
+ .set = jpeg_v4_0_set_interrupt_state,
+ .process = jpeg_v4_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_ras_irq_funcs = {
+ .set = jpeg_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_ras_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v4_0_ip_funcs,
+};
+
+static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = {
+ .query_poison_status = jpeg_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
+ },
+};
+
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[JPEG_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ adev->jpeg.ras = &jpeg_v4_0_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
new file mode 100644
index 000000000..07d36c2ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_H__
+#define __JPEG_V4_0_H__
+
+enum amdgpu_jpeg_v4_0_sub_block {
+ AMDGPU_JPEG_V4_0_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_JPEG1,
+
+ AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
+
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
new file mode 100644
index 000000000..1de79d660
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -0,0 +1,1218 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+enum jpeg_engin_status {
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_4_0__SRCID__JPEG_DECODE,
+ VCN_4_0__SRCID__JPEG1_DECODE,
+ VCN_4_0__SRCID__JPEG2_DECODE,
+ VCN_4_0__SRCID__JPEG3_DECODE,
+ VCN_4_0__SRCID__JPEG4_DECODE,
+ VCN_4_0__SRCID__JPEG5_DECODE,
+ VCN_4_0__SRCID__JPEG6_DECODE,
+ VCN_4_0__SRCID__JPEG7_DECODE
+};
+
+/**
+ * jpeg_v4_0_3_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+
+ jpeg_v4_0_3_set_dec_ring_funcs(adev);
+ jpeg_v4_0_3_set_irq_funcs(adev);
+ jpeg_v4_0_3_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_3_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 9 * jpeg_inst;
+ } else {
+ if (j < 4)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 4 + j + 32 * jpeg_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 8 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(
+ JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ (j ? (0x40 * j - 0xc80) : 0));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_3_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+ MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j <= 3) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 4].table_offset = item_offset;
+ header.mjpegdec1[j - 4].init_status = 0;
+ header.mjpegdec1[j - 4].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V4_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v4_0_3_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v4_0_3_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v4_0_3_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ } else {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ ring = adev->jpeg.inst[i].ring_dec;
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(
+ VCN, GET_INST(VCN, i),
+ regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index
+ << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+ }
+ DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
+
+ return ret;
+}
+
+/**
+ * jpeg_v4_0_3_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_3_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v4_0_3_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_3_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_3_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_3_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1));
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ int i, jpeg_inst;
+ uint32_t data;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK);
+ for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i)
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v4_0_3_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(
+ JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON
+ << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << j,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(
+ JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ int i, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(
+ JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_OFF
+ << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ (ring->pipe ? (0x40 * ring->pipe - 0xc80) :
+ 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->adev->jpeg.inst[ring->me].aid_id) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x4);
+ } else {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ if (ring->adev->jpeg.inst[ring->me].aid_id) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x0);
+ } else {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v4_0_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ret &= ((RREG32_SOC15_OFFSET(
+ JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS,
+ reg_offset) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(
+ JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (enable) {
+ if (!jpeg_v4_0_3_is_idle(handle))
+ return -EBUSY;
+ jpeg_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+static int jpeg_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_stop(adev);
+ else
+ ret = jpeg_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_4_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_4_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_4_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_4_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_4_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_4_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_4_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
+ .name = "jpeg_v4_0_3",
+ .early_init = jpeg_v4_0_3_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v4_0_3_sw_init,
+ .sw_fini = jpeg_v4_0_3_sw_fini,
+ .hw_init = jpeg_v4_0_3_hw_init,
+ .hw_fini = jpeg_v4_0_3_hw_fini,
+ .suspend = jpeg_v4_0_3_suspend,
+ .resume = jpeg_v4_0_3_resume,
+ .is_idle = jpeg_v4_0_3_is_idle,
+ .wait_for_idle = jpeg_v4_0_3_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+ DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
+ .set = jpeg_v4_0_3_set_interrupt_state,
+ .process = jpeg_v4_0_3_process_interrupt,
+};
+
+static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+ }
+ adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &jpeg_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"},
+ {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"},
+};
+
+static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* jpeg v4_0_3 only support uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, jpeg_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t jpeg_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ jpeg_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list),
+ GET_INST(VCN, jpeg_inst));
+}
+
+static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ dev_warn(adev->dev, "JPEG RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++)
+ jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ },
+};
+
+static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v4_0_3_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
new file mode 100644
index 000000000..22483dc66
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_3_H__
+#define __JPEG_V4_0_3_H__
+
+#define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d
+#define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e
+#define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab
+#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac
+#define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4
+#define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6
+#define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7
+#define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4
+#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5
+#define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
+
+#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
new file mode 100644
index 000000000..1a285b531
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v6_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_6_0_0_offset.h"
+#include "lsdma/lsdma_6_0_0_sh_mask.h"
+
+static int lsdma_v6_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v6_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v6_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v6_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = {
+ .copy_mem = lsdma_v6_0_copy_mem,
+ .fill_mem = lsdma_v6_0_fill_mem,
+ .update_memory_power_gating = lsdma_v6_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
new file mode 100644
index 000000000..3ef79be1a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V6_0_H__
+#define __LSDMA_V6_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs;
+
+#endif /* __LSDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
new file mode 100644
index 000000000..6dae4a2e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+#include "amdgpu_mca.h"
+
+#define smnMCMP0_STATUST0 0x03830408
+#define smnMCMP1_STATUST0 0x03b30408
+#define smnMCMPIO_STATUST0 0x0c930408
+
+
+static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMP0_STATUST0,
+ ras_error_status);
+}
+
+static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
+ enum amdgpu_ras_block block, uint32_t sub_block_index)
+{
+ if (!block_obj)
+ return -EINVAL;
+
+ if ((block_obj->ras_comm.block == block) &&
+ (block_obj->ras_comm.sub_block_index == sub_block_index)) {
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp0_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
+
+static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMP1_STATUST0,
+ ras_error_status);
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mp1_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
+
+static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_mca_query_ras_error_count(adev,
+ smnMCMPIO_STATUST0,
+ ras_error_status);
+}
+
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
+ .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
+ .query_ras_error_address = NULL,
+};
+
+struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
+ .ras_block = {
+ .hw_ops = &mca_v3_0_mpio_hw_ops,
+ .ras_block_match = mca_v3_0_ras_block_match,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
new file mode 100644
index 000000000..d3eaef0d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __MCA_V3_0_H__
+#define __MCA_V3_0_H__
+
+extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
new file mode 100644
index 000000000..eb06d7498
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -0,0 +1,1187 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "nv.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "gc/gc_10_1_0_default.h"
+#include "v10_structs.h"
+#include "mes_api_def.h"
+
+#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
+#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
+
+MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
+
+static int mes_v10_1_hw_fini(void *handle);
+static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+
+static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v10_1_ring_get_rptr,
+ .get_wptr = mes_v10_1_ring_get_wptr,
+ .set_wptr = mes_v10_1_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ void *pkt, int size,
+ int api_status_off)
+{
+ int ndw = size / 4;
+ signed long r;
+ union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ struct MES_API_STATUS *api_status;
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring;
+ unsigned long flags;
+
+ BUG_ON(size % 4 != 0);
+
+ spin_lock_irqsave(&mes->ring_lock, flags);
+ if (amdgpu_ring_alloc(ring, ndw)) {
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
+ api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndw);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+
+ DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
+ adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("MES failed to response msg=%d\n",
+ x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else
+ BUG();
+ return -1;
+}
+
+static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ input->inprocess_gang_priority;
+ mes_add_queue_pkt.gang_global_priority_level =
+ input->gang_global_priority_level;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+ mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+ else
+ mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+ }
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
+}
+
+static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
+ .add_hw_queue = mes_v10_1_add_hw_queue,
+ .remove_hw_queue = mes_v10_1_remove_hw_queue,
+ .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
+ .suspend_gang = mes_v10_1_suspend_gang,
+ .resume_gang = mes_v10_1_resume_gang,
+};
+
+static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq &&
+ pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ nv_grbm_select(adev, 3, pipe, 0, 0);
+ WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
+ (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
+ BYPASS_UNCACHED, 0);
+ WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
+ adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+ udelay(100);
+ } else {
+ data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
+ adev->enable_mes_kiq ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+ }
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v10_1_load_microcode(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ uint32_t data;
+
+ mes_v10_1_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v10_1_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v10_1_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ nv_grbm_select(adev, 3, pipe, 0, 0);
+
+ /* set ucode start address */
+ WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
+ (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
+
+ /* invalidate ICACHE */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ break;
+ }
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ break;
+ }
+
+ /* prime the ICACHE. */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ break;
+ }
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ break;
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ memset(mqd, 0, sizeof(*mqd));
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = mmCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = mmCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = mmCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell? */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+ else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+ mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT;
+ mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT;
+
+ tmp = mmCP_HQD_GFX_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1);
+ /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
+ mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
+
+ amdgpu_device_flush_hdp(ring->adev, NULL);
+ return 0;
+}
+
+#if 0
+static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+#endif
+
+static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+
+ return amdgpu_ring_test_helper(kiq_ring);
+}
+
+static int mes_v10_1_queue_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = mes_v10_1_mqd_init(&adev->mes.ring);
+ if (r)
+ return r;
+
+ r = mes_v10_1_kiq_enable_queue(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int mes_v10_1_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring;
+
+ ring->funcs = &mes_v10_1_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = 0;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v10_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring;
+ else
+ BUG();
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe]) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mes_v10_1_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe, r;
+
+ adev->mes.funcs = &mes_v10_1_funcs;
+ adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
+
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ r = mes_v10_1_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v10_1_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ if (adev->enable_mes_kiq) {
+ r = mes_v10_1_kiq_ring_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = mes_v10_1_ring_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int mes_v10_1_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe;
+
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ }
+
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
+ &adev->mes.ring.mqd_gpu_addr,
+ &adev->mes.ring.mqd_ptr);
+
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ amdgpu_ring_fini(&adev->mes.ring);
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 4):
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ break;
+ default:
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ break;
+ }
+}
+
+static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+ }
+
+ mes_v10_1_enable(adev, true);
+
+ mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ r = mes_v10_1_queue_init(adev);
+ if (r)
+ goto failure;
+
+ return r;
+
+failure:
+ mes_v10_1_hw_fini(adev);
+ return r;
+}
+
+static int mes_v10_1_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v10_1_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+ }
+
+ mes_v10_1_enable(adev, true);
+ }
+
+ r = mes_v10_1_queue_init(adev);
+ if (r)
+ goto failure;
+
+ r = mes_v10_1_set_hw_resources(&adev->mes);
+ if (r)
+ goto failure;
+
+ mes_v10_1_init_aggregated_doorbell(&adev->mes);
+
+ r = mes_v10_1_query_sched_status(&adev->mes);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring.sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v10_1_hw_fini(adev);
+ return r;
+}
+
+static int mes_v10_1_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mes.ring.sched.ready = false;
+
+ mes_v10_1_enable(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ return 0;
+}
+
+static int mes_v10_1_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_mes_suspend(adev);
+ if (r)
+ return r;
+
+ return mes_v10_1_hw_fini(adev);
+}
+
+static int mes_v10_1_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = mes_v10_1_hw_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_mes_resume(adev);
+}
+
+static int mes_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe, r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v10_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_in_reset(adev))
+ amdgpu_mes_self_test(adev);
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
+ .name = "mes_v10_1",
+ .early_init = mes_v10_0_early_init,
+ .late_init = mes_v10_0_late_init,
+ .sw_init = mes_v10_1_sw_init,
+ .sw_fini = mes_v10_1_sw_fini,
+ .hw_init = mes_v10_1_hw_init,
+ .hw_fini = mes_v10_1_hw_fini,
+ .suspend = mes_v10_1_suspend,
+ .resume = mes_v10_1_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 10,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &mes_v10_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
new file mode 100644
index 000000000..9afd6ddb0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V10_1_H__
+#define __MES_V10_1_H__
+
+extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
new file mode 100644
index 000000000..6827d5470
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -0,0 +1,1340 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v11_structs.h"
+#include "mes_v11_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
+
+static int mes_v11_0_hw_fini(void *handle);
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+
+static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v11_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v11_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v11_0_ring_get_rptr,
+ .get_wptr = mes_v11_0_ring_get_wptr,
+ .set_wptr = mes_v11_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ void *pkt, int size,
+ int api_status_off)
+{
+ int ndw = size / 4;
+ signed long r;
+ union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ struct MES_API_STATUS *api_status;
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring;
+ unsigned long flags;
+ signed long timeout = adev->usec_timeout;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+ BUG_ON(size % 4 != 0);
+
+ spin_lock_irqsave(&mes->ring_lock, flags);
+ if (amdgpu_ring_alloc(ring, ndw)) {
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
+ api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndw);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+
+ DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
+ timeout);
+ if (r < 1) {
+ DRM_ERROR("MES failed to response msg=%d\n",
+ x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else
+ BUG();
+ return -1;
+}
+
+static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ input->inprocess_gang_priority;
+ mes_add_queue_pkt.gang_global_priority_level =
+ input->gang_global_priority_level;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 2)
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+ else
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
+}
+
+static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
+ .add_hw_queue = mes_v11_0_add_hw_queue,
+ .remove_hw_queue = mes_v11_0_remove_hw_queue,
+ .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
+ .suspend_gang = mes_v11_0_suspend_gang,
+ .resume_gang = mes_v11_0_resume_gang,
+ .misc_op = mes_v11_0_misc_op,
+};
+
+static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq &&
+ pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
+ adev->enable_mes_kiq ? 1 : 0);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else
+ udelay(50);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
+ adev->enable_mes_kiq ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+ uint64_t ucode_addr;
+
+ mes_v11_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v11_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v11_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ memset(mqd, 0, sizeof(*mqd));
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ amdgpu_device_flush_hdp(ring->adev, NULL);
+ return 0;
+}
+
+static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v11_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+
+ return amdgpu_ring_test_helper(kiq_ring);
+}
+
+static int mes_v11_0_queue_init(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring;
+ else
+ BUG();
+
+ if ((pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v11_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ r = mes_v11_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v11_0_queue_init_register(ring);
+ }
+
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v11_0_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring;
+
+ ring->funcs = &mes_v11_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = 0;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum admgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v11_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring = &adev->mes.ring;
+ else
+ BUG();
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe]) {
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe, r;
+
+ adev->mes.funcs = &mes_v11_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+
+ r = mes_v11_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v11_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ if (adev->enable_mes_kiq) {
+ r = mes_v11_0_kiq_ring_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = mes_v11_0_ring_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int mes_v11_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe;
+
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+ }
+
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
+ &adev->mes.ring.mqd_gpu_addr,
+ &adev->mes.ring.mqd_ptr);
+
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ amdgpu_ring_fini(&adev->mes.ring);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v11_0_kiq_dequeue(struct amdgpu_ring *ring)
+{
+ uint32_t data;
+ int i;
+ struct amdgpu_device *adev = ring->adev;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+}
+
+static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* tell RLC which is KIQ dequeue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= ~RLC_CP_SCHEDULERS__scheduler0_MASK;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+}
+
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ }
+
+ mes_v11_0_enable(adev, true);
+
+ mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ return r;
+
+failure:
+ mes_v11_0_hw_fini(adev);
+ return r;
+}
+
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring.sched.ready) {
+ mes_v11_0_kiq_dequeue(&adev->mes.ring);
+ adev->mes.ring.sched.ready = false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring);
+ mes_v11_0_kiq_clear(adev);
+ }
+
+ mes_v11_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v11_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v11_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+ }
+
+ mes_v11_0_enable(adev, true);
+ }
+
+ r = mes_v11_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v11_0_set_hw_resources(&adev->mes);
+ if (r)
+ goto failure;
+
+ mes_v11_0_init_aggregated_doorbell(&adev->mes);
+
+ r = mes_v11_0_query_sched_status(&adev->mes);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring.sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v11_0_hw_fini(adev);
+ return r;
+}
+
+static int mes_v11_0_hw_fini(void *handle)
+{
+ return 0;
+}
+
+static int mes_v11_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_mes_suspend(adev);
+ if (r)
+ return r;
+
+ return mes_v11_0_hw_fini(adev);
+}
+
+static int mes_v11_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = mes_v11_0_hw_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_mes_resume(adev);
+}
+
+static int mes_v11_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int pipe, r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
+ continue;
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v11_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* it's only intended for use in mes_self_test case, not for s0ix and reset */
+ if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
+ (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
+ amdgpu_mes_self_test(adev);
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
+ .name = "mes_v11_0",
+ .early_init = mes_v11_0_early_init,
+ .late_init = mes_v11_0_late_init,
+ .sw_init = mes_v11_0_sw_init,
+ .sw_fini = mes_v11_0_sw_fini,
+ .hw_init = mes_v11_0_hw_init,
+ .hw_fini = mes_v11_0_hw_fini,
+ .suspend = mes_v11_0_suspend,
+ .resume = mes_v11_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
new file mode 100644
index 000000000..b3519e1df
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V11_0_H__
+#define __MES_V11_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v11_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
new file mode 100644
index 000000000..fb91b3105
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "mmhub_v1_0.h"
+
+#include "mmhub/mmhub_1_0_offset.h"
+#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "mmhub/mmhub_1_0_default.h"
+#include "vega10_enum.h"
+#include "soc15.h"
+#include "soc15_common.h"
+
+#define mmDAGB0_CNTL_MISC2_RV 0x008f
+#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
+
+static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+
+ tmp = mmVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
+
+ tmp = mmVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp);
+}
+
+static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned num_level, block_size;
+ uint32_t tmp;
+ int i;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
+ amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GMC,
+ enable);
+}
+
+static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ mmhub_v1_0_init_gart_aperture_regs(adev);
+ mmhub_v1_0_init_system_aperture_regs(adev);
+ mmhub_v1_0_init_tlb_regs(adev);
+ mmhub_v1_0_init_cache_regs(adev);
+
+ mmhub_v1_0_enable_system_domain(adev);
+ mmhub_v1_0_disable_identity_aperture(adev);
+ mmhub_v1_0_setup_vmid_config(adev);
+ mmhub_v1_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < AMDGPU_NUM_VMID; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+ }
+}
+
+/**
+ * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static void mmhub_v1_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+}
+
+static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
+
+ if (adev->asic_type != CHIP_RAVEN) {
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2);
+ } else
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data |= ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ if (adev->asic_type != CHIP_RAVEN)
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ } else {
+ data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ if (adev->asic_type != CHIP_RAVEN)
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
+
+ if (def1 != data1) {
+ if (adev->asic_type != CHIP_RAVEN)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+ else
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1);
+ }
+
+ if (adev->asic_type != CHIP_RAVEN && def2 != data2)
+ WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);
+}
+
+static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ mmhub_v1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if ((data & ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = {
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0},
+};
+
+static int mmhub_v1_0_get_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) {
+ if (mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v1_0_ras_fields[i].sec_count_mask) >>
+ mmhub_v1_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev,
+ "MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v1_0_ras_fields[i].ded_count_mask) >>
+ mmhub_v1_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev,
+ "MMHUB SubBlock %s, DED %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v1_0_get_ras_error_count(adev,
+ &mmhub_v1_0_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* read back edc counter registers to reset the counters to 0 */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+ }
+}
+
+struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_0_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_0_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_0_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
+ .get_fb_location = mmhub_v1_0_get_fb_location,
+ .init = mmhub_v1_0_init,
+ .gart_enable = mmhub_v1_0_gart_enable,
+ .set_fault_enable_default = mmhub_v1_0_set_fault_enable_default,
+ .gart_disable = mmhub_v1_0_gart_disable,
+ .set_clockgating = mmhub_v1_0_set_clockgating,
+ .get_clockgating = mmhub_v1_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v1_0_setup_vm_pt_regs,
+ .update_power_gating = mmhub_v1_0_update_power_gating,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
new file mode 100644
index 000000000..dae7ca48b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_0_H__
+#define __MMHUB_V1_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_0_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
new file mode 100644
index 000000000..9086f2fdf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "mmhub_v1_7.h"
+
+#include "mmhub/mmhub_1_7_offset.h"
+#include "mmhub/mmhub_1_7_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base));
+}
+
+static void mmhub_v1_7_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_7_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+}
+
+static void mmhub_v1_7_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL4, tmp);
+}
+
+static void mmhub_v1_7_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned num_level, block_size;
+ uint32_t tmp;
+ int i;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v1_7_init_gart_aperture_regs(adev);
+ mmhub_v1_7_init_system_aperture_regs(adev);
+ mmhub_v1_7_init_tlb_regs(adev);
+ mmhub_v1_7_init_cache_regs(adev);
+
+ mmhub_v1_7_enable_system_domain(adev);
+ mmhub_v1_7_disable_identity_aperture(adev);
+ mmhub_v1_7_setup_vmid_config(adev);
+ mmhub_v1_7_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, 0);
+ }
+}
+
+/**
+ * mmhub_v1_7_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static void mmhub_v1_7_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+}
+
+static void mmhub_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+ data |= ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ } else {
+ data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG, data);
+
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+}
+
+static void mmhub_v1_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ if (enable)
+ data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /* Change state only if MCCG support is enabled through driver */
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v1_7_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ /* Change state only if LS support is enabled through driver */
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v1_7_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if ((data & ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct soc15_ras_field_entry mmhub_v1_7_ras_fields[] = {
+ /* MMHUB Range 0 */
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 1 */
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHAB Range 2*/
+ { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Rang 3 */
+ { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 4 */
+ { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+
+ /* MMHUAB Range 5 */
+ { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+};
+
+static const struct soc15_reg_entry mmhub_v1_7_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_EDC_CNT3), 0, 0, 0 },
+};
+
+static int mmhub_v1_7_get_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t value,
+ uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ras_fields); i++) {
+ if(mmhub_v1_7_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v1_7_ras_fields[i].sec_count_mask) >>
+ mmhub_v1_7_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v1_7_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v1_7_ras_fields[i].ded_count_mask) >>
+ mmhub_v1_7_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, DED %d\n",
+ mmhub_v1_7_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v1_7_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v1_7_get_ras_error_count(adev, &mmhub_v1_7_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* write 0 to reset the edc counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_edc_cnt_regs); i++)
+ WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_edc_cnt_regs[i]), 0);
+ }
+}
+
+static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA3_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA4_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, regMMEA5_ERR_STATUS), 0, 0, 0 },
+};
+
+static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
+ i, reg_value);
+ }
+ }
+}
+
+static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ mmhub_v1_7_ea_err_status_regs[i]));
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x01);
+ WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
+ reg_value);
+ }
+}
+
+struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_7_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_7_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
+ .get_fb_location = mmhub_v1_7_get_fb_location,
+ .init = mmhub_v1_7_init,
+ .gart_enable = mmhub_v1_7_gart_enable,
+ .set_fault_enable_default = mmhub_v1_7_set_fault_enable_default,
+ .gart_disable = mmhub_v1_7_gart_disable,
+ .set_clockgating = mmhub_v1_7_set_clockgating,
+ .get_clockgating = mmhub_v1_7_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v1_7_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
new file mode 100644
index 000000000..629f49052
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_7_H__
+#define __MMHUB_V1_7_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_7_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
new file mode 100644
index 000000000..3d8e579d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "mmhub_v1_8.h"
+
+#include "mmhub/mmhub_1_8_0_offset.h"
+#include "mmhub/mmhub_1_8_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+
+#define regVM_L2_CNTL3_DEFAULT 0x80100007
+#define regVM_L2_CNTL4_DEFAULT 0x000000c1
+
+static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP);
+
+ base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base;
+ u32 inst_mask;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v1_8_setup_vm_pt_regs(adev, 0, pt_base);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ } else {
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ uint64_t value;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT,
+ adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
+ adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, 0xFFFFFF);
+ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_TOP, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_BASE,
+ 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ 0x3FFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+}
+
+static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ /* Setup TLB control */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION,
+ 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL2, tmp);
+
+ tmp = regVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL3, tmp);
+
+ tmp = regVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
+ }
+}
+
+static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp,
+ VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(MMHUB, i,
+ regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+ }
+}
+
+static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp, inst_mask;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH, num_level);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* On 9.4.3, XNACK can be enabled in the SQ
+ * per-process. Retry faults need to be enabled for
+ * that to work.
+ */
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1);
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+}
+
+static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 i, j, inst_mask;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, j,
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+ }
+}
+
+static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v1_8_init_gart_aperture_regs(adev);
+ mmhub_v1_8_init_system_aperture_regs(adev);
+ mmhub_v1_8_init_tlb_regs(adev);
+ mmhub_v1_8_init_cache_regs(adev);
+
+ mmhub_v1_8_enable_system_domain(adev);
+ mmhub_v1_8_disable_identity_aperture(adev);
+ mmhub_v1_8_setup_vmid_config(adev);
+ mmhub_v1_8_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j, inst_mask;
+
+ /* Disable all tables */
+ inst_mask = adev->aid_mask;
+ for_each_inst(j, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE,
+ 0);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
+ }
+ }
+}
+
+/**
+ * mmhub_v1_8_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp, inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp);
+ }
+}
+
+static void mmhub_v1_8_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+
+ hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, i, regVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, i,
+ regVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regVM_INVALIDATE_ENG1_REQ -
+ regVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static int mmhub_v1_8_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
+ .get_fb_location = mmhub_v1_8_get_fb_location,
+ .init = mmhub_v1_8_init,
+ .gart_enable = mmhub_v1_8_gart_enable,
+ .set_fault_enable_default = mmhub_v1_8_set_fault_enable_default,
+ .gart_disable = mmhub_v1_8_gart_disable,
+ .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs,
+ .set_clockgating = mmhub_v1_8_set_clockgating,
+ .get_clockgating = mmhub_v1_8_get_clockgating,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+ {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+ {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+ {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+ {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+ {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+ {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+ {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+ {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+ {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+ {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+ {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+ {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+ {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &err_data->ce_count);
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_inst);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
+ regMMEA0_ERR_STATUS,
+ regMMEA1_ERR_STATUS,
+ regMMEA2_ERR_STATUS,
+ regMMEA3_ERR_STATUS,
+ regMMEA4_ERR_STATUS,
+};
+
+static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t reg_value;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t i;
+
+ /* query mmea ras err status */
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
+ i, mmhub_inst, reg_value);
+ }
+ }
+
+ /* query mm_cane ras err status */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
+ mmhub_inst, reg_value);
+ }
+}
+
+static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_err_status(adev, i);
+}
+
+static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t mmea_cgtt_clk_cntl_addr_dist;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t reg_value;
+ uint32_t i;
+
+ /* reset mmea ras err status */
+ mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ /* force clk branch on for response path
+ * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
+ */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+
+ /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist,
+ reg_value);
+
+ /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 0);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+ }
+
+ /* reset mm_cane ras err status
+ * force clk branch on for response path
+ * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
+ */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+
+ /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
+
+ /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 0);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+}
+
+static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_err_status(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_8_ras_hw_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
new file mode 100644
index 000000000..126f0075a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V1_8_H__
+#define __MMHUB_V1_8_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v1_8_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
new file mode 100644
index 000000000..8f76c6ecf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -0,0 +1,718 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v2_0.h"
+
+#include "mmhub/mmhub_2_0_0_offset.h"
+#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "mmhub/mmhub_2_0_0_default.h"
+#include "navi10_enum.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "soc15_common.h"
+
+#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
+#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
+
+static const char *mmhub_client_ids_navi1x[][2] = {
+ [3][0] = "DCEDMC",
+ [4][0] = "DCEVGA",
+ [5][0] = "MP0",
+ [6][0] = "MP1",
+ [13][0] = "VMC",
+ [14][0] = "HDP",
+ [15][0] = "OSS",
+ [16][0] = "VCNU",
+ [17][0] = "JPEG",
+ [18][0] = "VCN",
+ [3][1] = "DCEDMC",
+ [4][1] = "DCEXFC",
+ [5][1] = "DCEVGA",
+ [6][1] = "DCEDWB",
+ [7][1] = "MP0",
+ [8][1] = "MP1",
+ [9][1] = "DBGU1",
+ [10][1] = "DBGU0",
+ [11][1] = "XDP",
+ [14][1] = "HDP",
+ [15][1] = "OSS",
+ [16][1] = "VCNU",
+ [17][1] = "JPEG",
+ [18][1] = "VCN",
+};
+
+static const char *mmhub_client_ids_sienna_cichlid[][2] = {
+ [3][0] = "DCEDMC",
+ [4][0] = "DCEVGA",
+ [5][0] = "MP0",
+ [6][0] = "MP1",
+ [8][0] = "VMC",
+ [9][0] = "VCNU0",
+ [10][0] = "JPEG",
+ [12][0] = "VCNU1",
+ [13][0] = "VCN1",
+ [14][0] = "HDP",
+ [15][0] = "OSS",
+ [32+11][0] = "VCN0",
+ [0][1] = "DBGU0",
+ [1][1] = "DBGU1",
+ [2][1] = "DCEDWB",
+ [3][1] = "DCEDMC",
+ [4][1] = "DCEVGA",
+ [5][1] = "MP0",
+ [6][1] = "MP1",
+ [7][1] = "XDP",
+ [9][1] = "VCNU0",
+ [10][1] = "JPEG",
+ [11][1] = "VCN0",
+ [12][1] = "VCNU1",
+ [13][1] = "VCN1",
+ [14][1] = "HDP",
+ [15][1] = "OSS",
+};
+
+static const char *mmhub_client_ids_beige_goby[][2] = {
+ [3][0] = "DCEDMC",
+ [4][0] = "DCEVGA",
+ [5][0] = "MP0",
+ [6][0] = "MP1",
+ [8][0] = "VMC",
+ [9][0] = "VCNU0",
+ [11][0] = "VCN0",
+ [14][0] = "HDP",
+ [15][0] = "OSS",
+ [0][1] = "DBGU0",
+ [1][1] = "DBGU1",
+ [2][1] = "DCEDWB",
+ [3][1] = "DCEDMC",
+ [4][1] = "DCEVGA",
+ [5][1] = "MP0",
+ [6][1] = "MP1",
+ [7][1] = "XDP",
+ [9][1] = "VCNU0",
+ [11][1] = "VCN0",
+ [14][1] = "HDP",
+ [15][1] = "OSS",
+};
+
+static uint32_t mmhub_v2_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ mmhub_cid = mmhub_client_ids_navi1x[cid][rw];
+ break;
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw];
+ break;
+ case IP_VERSION(2, 1, 2):
+ mmhub_cid = mmhub_client_ids_beige_goby[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ }
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
+
+ tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
+
+ tmp = mmMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp);
+
+ tmp = mmMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v2_0_init_gart_aperture_regs(adev);
+ mmhub_v2_0_init_system_aperture_regs(adev);
+ mmhub_v2_0_init_tlb_regs(adev);
+ mmhub_v2_0_init_cache_regs(adev);
+
+ mmhub_v2_0_enable_system_domain(adev);
+ mmhub_v2_0_disable_identity_aperture(adev);
+ mmhub_v2_0_setup_vmid_config(adev);
+ mmhub_v2_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < AMDGPU_NUM_VMID; i++)
+ WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v2_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v2_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v2_0_get_invalidate_req,
+};
+
+static void mmhub_v2_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ -
+ mmMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v2_0_vmhub_funcs;
+}
+
+static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
+ break;
+ default:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ break;
+ }
+
+ if (enable) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
+ break;
+ default:
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+ break;
+ }
+}
+
+static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ return;
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ /* There is no ATCL2 in MMHUB for 2.1.x */
+ return;
+ default:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ break;
+ }
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ mmhub_v2_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v2_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 1, 0):
+ case IP_VERSION(2, 1, 1):
+ case IP_VERSION(2, 1, 2):
+ /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+ * based on DAGB
+ */
+ data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ break;
+ }
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v2_0_funcs = {
+ .init = mmhub_v2_0_init,
+ .gart_enable = mmhub_v2_0_gart_enable,
+ .set_fault_enable_default = mmhub_v2_0_set_fault_enable_default,
+ .gart_disable = mmhub_v2_0_gart_disable,
+ .set_clockgating = mmhub_v2_0_set_clockgating,
+ .get_clockgating = mmhub_v2_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v2_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
new file mode 100644
index 000000000..f80f461d6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V2_0_H__
+#define __MMHUB_V2_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v2_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
new file mode 100644
index 000000000..1dce053a4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -0,0 +1,629 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v2_3.h"
+
+#include "mmhub/mmhub_2_3_0_offset.h"
+#include "mmhub/mmhub_2_3_0_sh_mask.h"
+#include "mmhub/mmhub_2_3_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+static const char *mmhub_client_ids_vangogh[][2] = {
+ [0][0] = "MP0",
+ [1][0] = "MP1",
+ [2][0] = "DCEDMC",
+ [3][0] = "DCEVGA",
+ [13][0] = "UTCL2",
+ [26][0] = "OSS",
+ [27][0] = "HDP",
+ [28][0] = "VCN",
+ [29][0] = "VCNU",
+ [30][0] = "JPEG",
+ [0][1] = "MP0",
+ [1][1] = "MP1",
+ [2][1] = "DCEDMC",
+ [3][1] = "DCEVGA",
+ [4][1] = "DCEDWB",
+ [5][1] = "XDP",
+ [26][1] = "OSS",
+ [27][1] = "HDP",
+ [28][1] = "VCN",
+ [29][1] = "VCNU",
+ [30][1] = "JPEG",
+};
+
+static uint32_t mmhub_v2_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 4, 0):
+ case IP_VERSION(2, 4, 1):
+ mmhub_cid = mmhub_client_ids_vangogh[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base));
+}
+
+static void mmhub_v2_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v2_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Disable AGP. */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
+
+ tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
+
+ tmp = mmMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp);
+
+ tmp = mmMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ mmhub_v2_3_init_gart_aperture_regs(adev);
+ mmhub_v2_3_init_system_aperture_regs(adev);
+ mmhub_v2_3_init_tlb_regs(adev);
+ mmhub_v2_3_init_cache_regs(adev);
+
+ mmhub_v2_3_enable_system_domain(adev);
+ mmhub_v2_3_disable_identity_aperture(adev);
+ mmhub_v2_3_setup_vmid_config(adev);
+ mmhub_v2_3_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < AMDGPU_NUM_VMID; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v2_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v2_3_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v2_3_get_invalidate_req,
+};
+
+static void mmhub_v2_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ -
+ mmMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs;
+}
+
+static void
+mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL);
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ } else {
+ data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+}
+
+static void
+mmhub_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1, def2, data2;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL);
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) {
+ data &= ~MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK;
+ data1 &= ~(DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK);
+ data2 &= ~(DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK);
+ } else {
+ data |= MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK;
+ data1 |= (DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK);
+ data2 |= (DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL, data1);
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL, data2);
+}
+
+static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v2_3_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v2_3_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data, data1, data2, data3;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ data1 = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL);
+ data2 = RREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL);
+ data3 = RREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
+ && !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) {
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+ }
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (!(data1 & MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK)
+ && !(data2 & (DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK))
+ && !(data3 & (DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
+ DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs = {
+ .init = mmhub_v2_3_init,
+ .gart_enable = mmhub_v2_3_gart_enable,
+ .set_fault_enable_default = mmhub_v2_3_set_fault_enable_default,
+ .gart_disable = mmhub_v2_3_gart_disable,
+ .set_clockgating = mmhub_v2_3_set_clockgating,
+ .get_clockgating = mmhub_v2_3_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v2_3_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h
new file mode 100644
index 000000000..2926d21de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V2_3_H__
+#define __MMHUB_V2_3_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
new file mode 100644
index 000000000..441379e91
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0.h"
+
+#include "mmhub/mmhub_3_0_0_offset.h"
+#include "mmhub/mmhub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_init_gart_aperture_regs(adev);
+ mmhub_v3_0_init_system_aperture_regs(adev);
+ mmhub_v3_0_init_tlb_regs(adev);
+ mmhub_v3_0_init_cache_regs(adev);
+
+ mmhub_v3_0_enable_system_domain(adev);
+ mmhub_v3_0_disable_identity_aperture(adev);
+ mmhub_v3_0_setup_vmid_config(adev);
+ mmhub_v3_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_get_invalidate_req,
+};
+
+static void mmhub_v3_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v3_0_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+#if 0
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#endif
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#if 0
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+#endif
+
+ if (enable) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#if 0
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+#endif
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#if 0
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+#endif
+}
+
+static void mmhub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v3_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v3_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs = {
+ .init = mmhub_v3_0_init,
+ .get_fb_location = mmhub_v3_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_gart_disable,
+ .set_clockgating = mmhub_v3_0_set_clockgating,
+ .get_clockgating = mmhub_v3_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
new file mode 100644
index 000000000..3ced20f35
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_H__
+#define __MMHUB_V3_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
new file mode 100644
index 000000000..12c7f4b46
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_1.h"
+
+#include "mmhub/mmhub_3_0_1_offset.h"
+#include "mmhub/mmhub_3_0_1_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_1[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_1_init_gart_aperture_regs(adev);
+ mmhub_v3_0_1_init_system_aperture_regs(adev);
+ mmhub_v3_0_1_init_tlb_regs(adev);
+ mmhub_v3_0_1_init_cache_regs(adev);
+
+ mmhub_v3_0_1_enable_system_domain(adev);
+ mmhub_v3_0_1_disable_identity_aperture(adev);
+ mmhub_v3_0_1_setup_vmid_config(adev);
+ mmhub_v3_0_1_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
+};
+
+static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
+ .init = mmhub_v3_0_1_init,
+ .get_fb_location = mmhub_v3_0_1_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_1_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_1_gart_disable,
+ .set_clockgating = mmhub_v3_0_1_set_clockgating,
+ .get_clockgating = mmhub_v3_0_1_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
new file mode 100644
index 000000000..4c1246735
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_1_H__
+#define __MMHUB_V3_0_1_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
new file mode 100644
index 000000000..5dadc85ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_2.h"
+
+#include "mmhub/mmhub_3_0_2_offset.h"
+#include "mmhub/mmhub_3_0_2_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_2[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_2_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ mmhub_cid = mmhub_client_ids_v3_0_2[cid][rw];
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_2_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_2_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ }
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_2_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_2_init_gart_aperture_regs(adev);
+ mmhub_v3_0_2_init_system_aperture_regs(adev);
+ mmhub_v3_0_2_init_tlb_regs(adev);
+ mmhub_v3_0_2_init_cache_regs(adev);
+
+ mmhub_v3_0_2_enable_system_domain(adev);
+ mmhub_v3_0_2_disable_identity_aperture(adev);
+ mmhub_v3_0_2_setup_vmid_config(adev);
+ mmhub_v3_0_2_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_2_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_2_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_2_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_2_get_invalidate_req,
+};
+
+static void mmhub_v3_0_2_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vmhub_funcs = &mmhub_v3_0_2_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_2_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_2_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static void mmhub_v3_0_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO
+}
+
+static int mmhub_v3_0_2_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_2_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_2_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_2_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ //TODO
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs = {
+ .init = mmhub_v3_0_2_init,
+ .get_fb_location = mmhub_v3_0_2_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_2_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_2_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_2_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_2_gart_disable,
+ .set_clockgating = mmhub_v3_0_2_set_clockgating,
+ .get_clockgating = mmhub_v3_0_2_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_2_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
new file mode 100644
index 000000000..23ad7b156
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_2_H__
+#define __MMHUB_V3_0_2_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
new file mode 100644
index 000000000..5718e4d40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -0,0 +1,1687 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "mmhub_v9_4.h"
+
+#include "mmhub/mmhub_9_4_1_offset.h"
+#include "mmhub/mmhub_9_4_1_sh_mask.h"
+#include "mmhub/mmhub_9_4_1_default.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "soc15.h"
+#include "soc15_common.h"
+
+#define MMHUB_NUM_INSTANCES 2
+#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000
+
+static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
+{
+ /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */
+ u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP);
+
+ base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
+ return base;
+}
+
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+ uint32_t vmid, uint64_t value)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ lower_32_bits(value));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ upper_32_bits(value));
+
+}
+
+static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++)
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid,
+ page_table_base);
+}
+
+static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ adev->gmc.agp_end >> 24);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ adev->gmc.agp_start >> 24);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(value >> 12));
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ tmp);
+ }
+}
+
+static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ATC_EN, 1);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
+ INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2,
+ INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev,
+ int hubid)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+}
+
+static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
+ int hubid)
+{
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+}
+
+static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH,
+ num_level);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev,
+ int hubid)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->eng_addr_distance,
+ 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->eng_addr_distance,
+ 0x1f);
+ }
+}
+
+static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ /* GART Enable. */
+ mmhub_v9_4_init_gart_aperture_regs(adev, i);
+ mmhub_v9_4_init_system_aperture_regs(adev, i);
+ mmhub_v9_4_init_tlb_regs(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_init_cache_regs(adev, i);
+
+ mmhub_v9_4_enable_system_domain(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_disable_identity_aperture(adev, i);
+ mmhub_v9_4_setup_vmid_config(adev, i);
+ mmhub_v9_4_program_invalidation(adev, i);
+ }
+
+ return 0;
+}
+
+static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i, j;
+
+ for (j = 0; j < MMHUB_NUM_INSTANCES; j++) {
+ /* Disable all tables */
+ for (i = 0; i < AMDGPU_NUM_VMID; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET +
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp,
+ VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL,
+ ENABLE_L2_CACHE, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
+ j * MMHUB_INSTANCE_REGISTER_OFFSET, 0);
+ }
+}
+
+/**
+ * mmhub_v9_4_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp,
+ VML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+ }
+}
+
+static void mmhub_v9_4_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = {
+ &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]};
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ hub[i]->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_ACK) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_CONTEXT0_CNTL) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
+
+ hub[i]->ctx_distance = mmVML2VC0_VM_CONTEXT1_CNTL -
+ mmVML2VC0_VM_CONTEXT0_CNTL;
+ hub[i]->ctx_addr_distance = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub[i]->eng_distance = mmVML2VC0_VM_INVALIDATE_ENG1_REQ -
+ mmVML2VC0_VM_INVALIDATE_ENG0_REQ;
+ hub[i]->eng_addr_distance = mmVML2VC0_VM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+ }
+}
+
+static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1;
+ int i, j;
+ int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data);
+
+ for (j = 0; j < 5; j++) {
+ def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_CNTL_MISC2,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET +
+ j * dist);
+ if (enable &&
+ (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data1 &=
+ ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ } else {
+ data1 |=
+ (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def1 != data1)
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_CNTL_MISC2,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET +
+ j * dist, data1);
+
+ if (i == 1 && j == 3)
+ break;
+ }
+ }
+}
+
+static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+ def = data = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG,
+ i * MMHUB_INSTANCE_REGISTER_OFFSET, data);
+ }
+}
+
+static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ mmhub_v9_4_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v9_4_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG);
+
+ if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = {
+ /* MMHUB Range 0 */
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 1 */
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHAB Range 2*/
+ { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Rang 3 */
+ { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 4 */
+ { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUAB Range 5 */
+ { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 6 */
+ { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 7*/
+ { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 },
+};
+
+static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev,
+ const struct soc15_reg_entry *reg,
+ uint32_t value,
+ uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
+ if (mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v9_4_ras_fields[i].sec_count_mask) >>
+ mmhub_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v9_4_ras_fields[i].ded_count_mask) >>
+ mmhub_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ dev_info(adev->dev, "MMHUB SubBlock %s, DED %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v9_4_get_ras_error_count(adev, &mmhub_v9_4_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+static void mmhub_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* read back edc counter registers to reset the counters to 0 */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+ }
+}
+
+static const struct soc15_reg_entry mmhub_v9_4_err_status_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_ERR_STATUS), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_ERR_STATUS), 0, 0, 0 },
+};
+
+static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_err_status_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_err_status_regs[i]));
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ /* SDP read/write error/parity error in FUE_IS_FATAL mode
+ * can cause system fatal error in arcturas. Harvest the error
+ * status before GPU reset */
+ dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
+ i, reg_value);
+ }
+ }
+}
+
+const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v9_4_query_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v9_4_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v9_4_ras_hw_ops,
+ },
+};
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+ .get_fb_location = mmhub_v9_4_get_fb_location,
+ .init = mmhub_v9_4_init,
+ .gart_enable = mmhub_v9_4_gart_enable,
+ .set_fault_enable_default = mmhub_v9_4_set_fault_enable_default,
+ .gart_disable = mmhub_v9_4_gart_disable,
+ .set_clockgating = mmhub_v9_4_set_clockgating,
+ .get_clockgating = mmhub_v9_4_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v9_4_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
new file mode 100644
index 000000000..a48329d95
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V9_4_H__
+#define __MMHUB_V9_4_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+extern struct amdgpu_mmhub_ras mmhub_v9_4_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
new file mode 100644
index 000000000..2cdab8062
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V1_0_H__
+#define __MMSCH_V1_0_H__
+
+#define MMSCH_VERSION 0x1
+
+enum mmsch_v1_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v1_0_init_header {
+ uint32_t version;
+ uint32_t header_size;
+ uint32_t vce_init_status;
+ uint32_t uvd_init_status;
+ uint32_t vce_table_offset;
+ uint32_t vce_table_size;
+ uint32_t uvd_table_offset;
+ uint32_t uvd_table_size;
+};
+
+struct mmsch_vf_eng_init_header {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v1_1_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_vf_eng_init_header eng[2];
+};
+
+struct mmsch_v1_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v1_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v1_0_cmd_direct_write {
+ struct mmsch_v1_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v1_0_cmd_direct_read_modify_write {
+ struct mmsch_v1_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v1_0_cmd_direct_polling {
+ struct mmsch_v1_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v1_0_cmd_end {
+ struct mmsch_v1_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v1_0_cmd_indirect_write {
+ struct mmsch_v1_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+static inline void mmsch_v1_0_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t value)
+{
+ direct_wt->cmd_header.reg_offset = reg_offset;
+ direct_wt->reg_value = value;
+ memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t mask, uint32_t data)
+{
+ direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
+ direct_rd_mod_wt->mask_value = mask;
+ direct_rd_mod_wt->write_data = data;
+ memcpy((void *)init_table, direct_rd_mod_wt,
+ sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
+}
+
+static inline void mmsch_v1_0_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t mask, uint32_t wait)
+{
+ direct_poll->cmd_header.reg_offset = reg_offset;
+ direct_poll->mask_value = mask;
+ direct_poll->wait_value = wait;
+ memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ mmsch_v1_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
+ init_table, (reg), \
+ (mask), (data)); \
+ init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+ table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_WT(reg, value) { \
+ mmsch_v1_0_insert_direct_wt(&direct_wt, \
+ init_table, (reg), \
+ (value)); \
+ init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+ table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
+}
+
+#define MMSCH_V1_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ mmsch_v1_0_insert_direct_poll(&direct_poll, \
+ init_table, (reg), \
+ (mask), (wait)); \
+ init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+ table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h
new file mode 100644
index 000000000..1b5086c7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v2_0.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V2_0_H__
+#define __MMSCH_V2_0_H__
+
+// addressBlock: uvd0_mmsch_dec
+// base address: 0x1e000
+#define mmMMSCH_UCODE_ADDR 0x0000
+#define mmMMSCH_UCODE_ADDR_BASE_IDX 0
+#define mmMMSCH_UCODE_DATA 0x0001
+#define mmMMSCH_UCODE_DATA_BASE_IDX 0
+#define mmMMSCH_SRAM_ADDR 0x0002
+#define mmMMSCH_SRAM_ADDR_BASE_IDX 0
+#define mmMMSCH_SRAM_DATA 0x0003
+#define mmMMSCH_SRAM_DATA_BASE_IDX 0
+#define mmMMSCH_VF_SRAM_OFFSET 0x0004
+#define mmMMSCH_VF_SRAM_OFFSET_BASE_IDX 0
+#define mmMMSCH_DB_SRAM_OFFSET 0x0005
+#define mmMMSCH_DB_SRAM_OFFSET_BASE_IDX 0
+#define mmMMSCH_CTX_SRAM_OFFSET 0x0006
+#define mmMMSCH_CTX_SRAM_OFFSET_BASE_IDX 0
+#define mmMMSCH_CTL 0x0007
+#define mmMMSCH_CTL_BASE_IDX 0
+#define mmMMSCH_INTR 0x0008
+#define mmMMSCH_INTR_BASE_IDX 0
+#define mmMMSCH_INTR_ACK 0x0009
+#define mmMMSCH_INTR_ACK_BASE_IDX 0
+#define mmMMSCH_INTR_STATUS 0x000a
+#define mmMMSCH_INTR_STATUS_BASE_IDX 0
+#define mmMMSCH_VF_VMID 0x000b
+#define mmMMSCH_VF_VMID_BASE_IDX 0
+#define mmMMSCH_VF_CTX_ADDR_LO 0x000c
+#define mmMMSCH_VF_CTX_ADDR_LO_BASE_IDX 0
+#define mmMMSCH_VF_CTX_ADDR_HI 0x000d
+#define mmMMSCH_VF_CTX_ADDR_HI_BASE_IDX 0
+#define mmMMSCH_VF_CTX_SIZE 0x000e
+#define mmMMSCH_VF_CTX_SIZE_BASE_IDX 0
+#define mmMMSCH_VF_GPCOM_ADDR_LO 0x000f
+#define mmMMSCH_VF_GPCOM_ADDR_LO_BASE_IDX 0
+#define mmMMSCH_VF_GPCOM_ADDR_HI 0x0010
+#define mmMMSCH_VF_GPCOM_ADDR_HI_BASE_IDX 0
+#define mmMMSCH_VF_GPCOM_SIZE 0x0011
+#define mmMMSCH_VF_GPCOM_SIZE_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_HOST 0x0012
+#define mmMMSCH_VF_MAILBOX_HOST_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_RESP 0x0013
+#define mmMMSCH_VF_MAILBOX_RESP_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_0 0x0014
+#define mmMMSCH_VF_MAILBOX_0_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_0_RESP 0x0015
+#define mmMMSCH_VF_MAILBOX_0_RESP_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_1 0x0016
+#define mmMMSCH_VF_MAILBOX_1_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX_1_RESP 0x0017
+#define mmMMSCH_VF_MAILBOX_1_RESP_BASE_IDX 0
+#define mmMMSCH_CNTL 0x001c
+#define mmMMSCH_CNTL_BASE_IDX 0
+#define mmMMSCH_NONCACHE_OFFSET0 0x001d
+#define mmMMSCH_NONCACHE_OFFSET0_BASE_IDX 0
+#define mmMMSCH_NONCACHE_SIZE0 0x001e
+#define mmMMSCH_NONCACHE_SIZE0_BASE_IDX 0
+#define mmMMSCH_NONCACHE_OFFSET1 0x001f
+#define mmMMSCH_NONCACHE_OFFSET1_BASE_IDX 0
+#define mmMMSCH_NONCACHE_SIZE1 0x0020
+#define mmMMSCH_NONCACHE_SIZE1_BASE_IDX 0
+#define mmMMSCH_PDEBUG_STATUS 0x0021
+#define mmMMSCH_PDEBUG_STATUS_BASE_IDX 0
+#define mmMMSCH_PDEBUG_DATA_32UPPERBITS 0x0022
+#define mmMMSCH_PDEBUG_DATA_32UPPERBITS_BASE_IDX 0
+#define mmMMSCH_PDEBUG_DATA_32LOWERBITS 0x0023
+#define mmMMSCH_PDEBUG_DATA_32LOWERBITS_BASE_IDX 0
+#define mmMMSCH_PDEBUG_EPC 0x0024
+#define mmMMSCH_PDEBUG_EPC_BASE_IDX 0
+#define mmMMSCH_PDEBUG_EXCCAUSE 0x0025
+#define mmMMSCH_PDEBUG_EXCCAUSE_BASE_IDX 0
+#define mmMMSCH_PROC_STATE1 0x0026
+#define mmMMSCH_PROC_STATE1_BASE_IDX 0
+#define mmMMSCH_LAST_MC_ADDR 0x0027
+#define mmMMSCH_LAST_MC_ADDR_BASE_IDX 0
+#define mmMMSCH_LAST_MEM_ACCESS_HI 0x0028
+#define mmMMSCH_LAST_MEM_ACCESS_HI_BASE_IDX 0
+#define mmMMSCH_LAST_MEM_ACCESS_LO 0x0029
+#define mmMMSCH_LAST_MEM_ACCESS_LO_BASE_IDX 0
+#define mmMMSCH_IOV_ACTIVE_FCN_ID 0x002a
+#define mmMMSCH_IOV_ACTIVE_FCN_ID_BASE_IDX 0
+#define mmMMSCH_SCRATCH_0 0x002b
+#define mmMMSCH_SCRATCH_0_BASE_IDX 0
+#define mmMMSCH_SCRATCH_1 0x002c
+#define mmMMSCH_SCRATCH_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_0 0x002d
+#define mmMMSCH_GPUIOV_SCH_BLOCK_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_CONTROL_0 0x002e
+#define mmMMSCH_GPUIOV_CMD_CONTROL_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_0 0x002f
+#define mmMMSCH_GPUIOV_CMD_STATUS_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_0 0x0030
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_0 0x0031
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_0 0x0032
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW6_0 0x0033
+#define mmMMSCH_GPUIOV_DW6_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW7_0 0x0034
+#define mmMMSCH_GPUIOV_DW7_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW8_0 0x0035
+#define mmMMSCH_GPUIOV_DW8_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_1 0x0036
+#define mmMMSCH_GPUIOV_SCH_BLOCK_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_CONTROL_1 0x0037
+#define mmMMSCH_GPUIOV_CMD_CONTROL_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_1 0x0038
+#define mmMMSCH_GPUIOV_CMD_STATUS_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_1 0x0039
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_1 0x003a
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_1 0x003b
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW6_1 0x003c
+#define mmMMSCH_GPUIOV_DW6_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW7_1 0x003d
+#define mmMMSCH_GPUIOV_DW7_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW8_1 0x003e
+#define mmMMSCH_GPUIOV_DW8_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CNTXT 0x003f
+#define mmMMSCH_GPUIOV_CNTXT_BASE_IDX 0
+#define mmMMSCH_SCRATCH_2 0x0040
+#define mmMMSCH_SCRATCH_2_BASE_IDX 0
+#define mmMMSCH_SCRATCH_3 0x0041
+#define mmMMSCH_SCRATCH_3_BASE_IDX 0
+#define mmMMSCH_SCRATCH_4 0x0042
+#define mmMMSCH_SCRATCH_4_BASE_IDX 0
+#define mmMMSCH_SCRATCH_5 0x0043
+#define mmMMSCH_SCRATCH_5_BASE_IDX 0
+#define mmMMSCH_SCRATCH_6 0x0044
+#define mmMMSCH_SCRATCH_6_BASE_IDX 0
+#define mmMMSCH_SCRATCH_7 0x0045
+#define mmMMSCH_SCRATCH_7_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_HEAD_0 0x0046
+#define mmMMSCH_VFID_FIFO_HEAD_0_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_TAIL_0 0x0047
+#define mmMMSCH_VFID_FIFO_TAIL_0_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_HEAD_1 0x0048
+#define mmMMSCH_VFID_FIFO_HEAD_1_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_TAIL_1 0x0049
+#define mmMMSCH_VFID_FIFO_TAIL_1_BASE_IDX 0
+#define mmMMSCH_NACK_STATUS 0x004a
+#define mmMMSCH_NACK_STATUS_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX0_DATA 0x004b
+#define mmMMSCH_VF_MAILBOX0_DATA_BASE_IDX 0
+#define mmMMSCH_VF_MAILBOX1_DATA 0x004c
+#define mmMMSCH_VF_MAILBOX1_DATA_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_0 0x004d
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_0 0x004e
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_0 0x004f
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_0_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_1 0x0050
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_1 0x0051
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_1 0x0052
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_1_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CNTXT_IP 0x0053
+#define mmMMSCH_GPUIOV_CNTXT_IP_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_2 0x0054
+#define mmMMSCH_GPUIOV_SCH_BLOCK_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_CONTROL_2 0x0055
+#define mmMMSCH_GPUIOV_CMD_CONTROL_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_2 0x0056
+#define mmMMSCH_GPUIOV_CMD_STATUS_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_2 0x0057
+#define mmMMSCH_GPUIOV_VM_BUSY_STATUS_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_2 0x0058
+#define mmMMSCH_GPUIOV_ACTIVE_FCNS_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_2 0x0059
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW6_2 0x005a
+#define mmMMSCH_GPUIOV_DW6_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW7_2 0x005b
+#define mmMMSCH_GPUIOV_DW7_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_DW8_2 0x005c
+#define mmMMSCH_GPUIOV_DW8_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_2 0x005d
+#define mmMMSCH_GPUIOV_SCH_BLOCK_IP_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_2 0x005e
+#define mmMMSCH_GPUIOV_CMD_STATUS_IP_2_BASE_IDX 0
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_2 0x005f
+#define mmMMSCH_GPUIOV_ACTIVE_FCN_ID_IP_2_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_HEAD_2 0x0060
+#define mmMMSCH_VFID_FIFO_HEAD_2_BASE_IDX 0
+#define mmMMSCH_VFID_FIFO_TAIL_2 0x0061
+#define mmMMSCH_VFID_FIFO_TAIL_2_BASE_IDX 0
+#define mmMMSCH_VM_BUSY_STATUS_0 0x0062
+#define mmMMSCH_VM_BUSY_STATUS_0_BASE_IDX 0
+#define mmMMSCH_VM_BUSY_STATUS_1 0x0063
+#define mmMMSCH_VM_BUSY_STATUS_1_BASE_IDX 0
+#define mmMMSCH_VM_BUSY_STATUS_2 0x0064
+#define mmMMSCH_VM_BUSY_STATUS_2_BASE_IDX 0
+
+#define MMSCH_VERSION_MAJOR 2
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+enum mmsch_v2_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v2_0_init_header {
+ uint32_t version;
+ uint32_t header_size;
+ uint32_t vcn_init_status;
+ uint32_t vcn_table_offset;
+ uint32_t vcn_table_size;
+};
+
+struct mmsch_v2_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v2_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v2_0_cmd_direct_write {
+ struct mmsch_v2_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v2_0_cmd_direct_read_modify_write {
+ struct mmsch_v2_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v2_0_cmd_direct_polling {
+ struct mmsch_v2_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v2_0_cmd_end {
+ struct mmsch_v2_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v2_0_cmd_indirect_write {
+ struct mmsch_v2_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+static inline void mmsch_v2_0_insert_direct_wt(struct mmsch_v2_0_cmd_direct_write *direct_wt,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t value)
+{
+ direct_wt->cmd_header.reg_offset = reg_offset;
+ direct_wt->reg_value = value;
+ memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v2_0_cmd_direct_write));
+}
+
+static inline void mmsch_v2_0_insert_direct_rd_mod_wt(struct mmsch_v2_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t mask, uint32_t data)
+{
+ direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
+ direct_rd_mod_wt->mask_value = mask;
+ direct_rd_mod_wt->write_data = data;
+ memcpy((void *)init_table, direct_rd_mod_wt,
+ sizeof(struct mmsch_v2_0_cmd_direct_read_modify_write));
+}
+
+static inline void mmsch_v2_0_insert_direct_poll(struct mmsch_v2_0_cmd_direct_polling *direct_poll,
+ uint32_t *init_table,
+ uint32_t reg_offset,
+ uint32_t mask, uint32_t wait)
+{
+ direct_poll->cmd_header.reg_offset = reg_offset;
+ direct_poll->mask_value = mask;
+ direct_poll->wait_value = wait;
+ memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v2_0_cmd_direct_polling));
+}
+
+#define MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ mmsch_v2_0_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
+ init_table, (reg), \
+ (mask), (data)); \
+ init_table += sizeof(struct mmsch_v2_0_cmd_direct_read_modify_write)/4; \
+ table_size += sizeof(struct mmsch_v2_0_cmd_direct_read_modify_write)/4; \
+}
+
+#define MMSCH_V2_0_INSERT_DIRECT_WT(reg, value) { \
+ mmsch_v2_0_insert_direct_wt(&direct_wt, \
+ init_table, (reg), \
+ (value)); \
+ init_table += sizeof(struct mmsch_v2_0_cmd_direct_write)/4; \
+ table_size += sizeof(struct mmsch_v2_0_cmd_direct_write)/4; \
+}
+
+#define MMSCH_V2_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ mmsch_v2_0_insert_direct_poll(&direct_poll, \
+ init_table, (reg), \
+ (mask), (wait)); \
+ init_table += sizeof(struct mmsch_v2_0_cmd_direct_polling)/4; \
+ table_size += sizeof(struct mmsch_v2_0_cmd_direct_polling)/4; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
new file mode 100644
index 000000000..a773ef61b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V3_0_H__
+#define __MMSCH_V3_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 3
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define MMSCH_V3_0_VCN_INSTANCES 0x2
+
+enum mmsch_v3_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v3_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v3_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v3_0_table_info inst[MMSCH_V3_0_VCN_INSTANCES];
+};
+
+struct mmsch_v3_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v3_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v3_0_cmd_direct_write {
+ struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v3_0_cmd_direct_read_modify_write {
+ struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v3_0_cmd_direct_polling {
+ struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v3_0_cmd_end {
+ struct mmsch_v3_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v3_0_cmd_indirect_write {
+ struct mmsch_v3_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v3_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V3_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v3_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V3_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v3_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V3_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v3_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
new file mode 100644
index 000000000..796d4f879
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_H__
+#define __MMSCH_V4_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 4
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+
+#define MMSCH_V4_0_VCN_INSTANCES 0x2
+
+enum mmsch_v4_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v4_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v4_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info inst[MMSCH_V4_0_VCN_INSTANCES];
+ struct mmsch_v4_0_table_info jpegdec;
+};
+
+struct mmsch_v4_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v4_0_cmd_direct_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v4_0_cmd_direct_read_modify_write {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v4_0_cmd_direct_polling {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v4_0_cmd_end {
+ struct mmsch_v4_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v4_0_cmd_indirect_write {
+ struct mmsch_v4_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v4_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V4_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v4_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index 000000000..db7eb5260
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v4_0_table_info vcn0;
+ struct mmsch_v4_0_table_info mjpegdec0[4];
+ struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
new file mode 100644
index 000000000..63725b2eb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "nbio/nbio_6_1_offset.h"
+#include "nbio/nbio_6_1_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "mp/mp_9_0_offset.h"
+#include "soc15.h"
+#include "vega10_ih.h"
+#include "soc15_common.h"
+#include "mxgpu_ai.h"
+
+#include "amdgpu_reset.h"
+
+static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
+{
+ WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
+}
+
+static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+ WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
+
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+ return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
+}
+
+
+static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
+ enum idh_event event)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
+ if (reg != event)
+ return -ENOENT;
+
+ xgpu_ai_mailbox_send_ack(adev);
+
+ return 0;
+}
+
+static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) {
+ return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
+static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
+{
+ int timeout = AI_MAILBOX_POLL_ACK_TIMEDOUT;
+ u8 reg;
+
+ do {
+ reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+ if (reg & 2)
+ return 0;
+
+ mdelay(5);
+ timeout -= 5;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
+
+ return -ETIME;
+}
+
+static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+ int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT;
+
+ do {
+ r = xgpu_ai_mailbox_rcv_msg(adev, event);
+ if (!r)
+ return 0;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+ return -ETIME;
+}
+
+static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3) {
+ u32 reg;
+ int r;
+ uint8_t trn;
+
+ /* IMPORTANT:
+ * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+ * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+ * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack()
+ * will return immediatly
+ */
+ do {
+ xgpu_ai_mailbox_set_valid(adev, false);
+ trn = xgpu_ai_peek_ack(adev);
+ if (trn) {
+ pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ msleep(1);
+ }
+ } while(trn);
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
+ reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
+ MSGBUF_DATA, req);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
+ reg);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1),
+ data1);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2),
+ data2);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3),
+ data3);
+
+ xgpu_ai_mailbox_set_valid(adev, true);
+
+ /* start to poll ack */
+ r = xgpu_ai_poll_ack(adev);
+ if (r)
+ pr_err("Doesn't get ack from pf, continue\n");
+
+ xgpu_ai_mailbox_set_valid(adev, false);
+}
+
+static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ int r;
+
+ xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ /* start to check msg if request is idh_req_gpu_init_access */
+ if (req == IDH_REQ_GPU_INIT_ACCESS ||
+ req == IDH_REQ_GPU_FINI_ACCESS ||
+ req == IDH_REQ_GPU_RESET_ACCESS) {
+ r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+ if (r) {
+ pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ return r;
+ }
+ /* Retrieve checksum from mailbox2 */
+ if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
+ adev->virt.fw_reserve.checksum_key =
+ RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
+ }
+ } else if (req == IDH_REQ_GPU_INIT_DATA){
+ /* Dummy REQ_GPU_INIT_DATA handling */
+ r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
+ /* version set to 0 since dummy */
+ adev->virt.req_init_data_ver = 0;
+ }
+
+ return 0;
+}
+
+static int xgpu_ai_request_reset(struct amdgpu_device *adev)
+{
+ int ret, i = 0;
+
+ while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+ ret = xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+ if (!ret)
+ break;
+ i++;
+ }
+
+ return ret;
+}
+
+static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+
+ req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+ return xgpu_ai_send_access_requests(adev, req);
+}
+
+static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+ int r = 0;
+
+ req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+ r = xgpu_ai_send_access_requests(adev, req);
+
+ return r;
+}
+
+static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("get ack intr and do nothing.\n");
+ return 0;
+}
+
+static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+
+ /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+ * otherwise the mailbox msg will be ruined/reseted by
+ * the VF FLR.
+ */
+ if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
+ return;
+
+ down_write(&adev->reset_domain->sem);
+
+ amdgpu_virt_fini_data_exchange(adev);
+
+ xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+
+ do {
+ if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+ goto flr_done;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+flr_done:
+ atomic_set(&adev->reset_domain->in_gpu_reset, 0);
+ up_write(&adev->reset_domain->sem);
+
+ /* Trigger recovery for world switch failure if no TDR */
+ if (amdgpu_device_should_recover_gpu(adev)
+ && (!amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+
+ switch (event) {
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_QUERY_ALIVE:
+ xgpu_ai_mailbox_send_ack(adev);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = {
+ .set = xgpu_ai_set_mailbox_ack_irq,
+ .process = xgpu_ai_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = {
+ .set = xgpu_ai_set_mailbox_rcv_irq,
+ .process = xgpu_ai_mailbox_rcv_irq,
+};
+
+void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->virt.ack_irq.num_types = 1;
+ adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs;
+ adev->virt.rcv_irq.num_types = 1;
+ adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ return 0;
+}
+
+int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+ if (r)
+ return r;
+ r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+
+ return 0;
+}
+
+void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
+{
+ amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
+{
+ return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev)
+{
+ xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
+}
+
+const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
+ .req_full_gpu = xgpu_ai_request_full_gpu_access,
+ .rel_full_gpu = xgpu_ai_release_full_gpu_access,
+ .reset_gpu = xgpu_ai_request_reset,
+ .wait_reset = NULL,
+ .trans_msg = xgpu_ai_mailbox_trans_msg,
+ .req_init_data = xgpu_ai_request_init_data,
+ .ras_poison_handler = xgpu_ai_ras_poison_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
new file mode 100644
index 000000000..af1a78469
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MXGPU_AI_H__
+#define __MXGPU_AI_H__
+
+#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT 6000
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT 10000
+#define AI_MAILBOX_POLL_MSG_REP_MAX 11
+
+enum idh_request {
+ IDH_REQ_GPU_INIT_ACCESS = 1,
+ IDH_REL_GPU_INIT_ACCESS,
+ IDH_REQ_GPU_FINI_ACCESS,
+ IDH_REL_GPU_FINI_ACCESS,
+ IDH_REQ_GPU_RESET_ACCESS,
+ IDH_REQ_GPU_INIT_DATA,
+
+ IDH_LOG_VF_ERROR = 200,
+ IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+};
+
+enum idh_event {
+ IDH_CLR_MSG_BUF = 0,
+ IDH_READY_TO_ACCESS_GPU,
+ IDH_FLR_NOTIFICATION,
+ IDH_FLR_NOTIFICATION_CMPL,
+ IDH_SUCCESS,
+ IDH_FAIL,
+ IDH_QUERY_ALIVE,
+ IDH_REQ_GPU_INIT_DATA_READY,
+
+ IDH_TEXT_MESSAGE = 255,
+};
+
+extern const struct amdgpu_virt_ops xgpu_ai_virt_ops;
+
+void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
+
+#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
+#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
new file mode 100644
index 000000000..6a68ee946
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "soc15.h"
+#include "navi10_ih.h"
+#include "soc15_common.h"
+#include "mxgpu_nv.h"
+
+#include "amdgpu_reset.h"
+
+static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
+{
+ WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
+}
+
+static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+ WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
+
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+ return RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
+}
+
+
+static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
+ enum idh_event event)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
+ if (reg != event)
+ return -ENOENT;
+
+ xgpu_nv_mailbox_send_ack(adev);
+
+ return 0;
+}
+
+static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
+{
+ return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
+static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT;
+ u8 reg;
+
+ do {
+ reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+ if (reg & 2)
+ return 0;
+
+ mdelay(5);
+ timeout -= 5;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+
+ return -ETIME;
+}
+
+static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+ int r;
+ uint64_t timeout, now;
+
+ now = (uint64_t)ktime_to_ms(ktime_get());
+ timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
+
+ do {
+ r = xgpu_nv_mailbox_rcv_msg(adev, event);
+ if (!r)
+ return 0;
+
+ msleep(10);
+ now = (uint64_t)ktime_to_ms(ktime_get());
+ } while (timeout > now);
+
+
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
+{
+ int r;
+ uint8_t trn;
+
+ /* IMPORTANT:
+ * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+ * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+ * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack()
+ * will return immediatly
+ */
+ do {
+ xgpu_nv_mailbox_set_valid(adev, false);
+ trn = xgpu_nv_peek_ack(adev);
+ if (trn) {
+ pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ msleep(1);
+ }
+ } while (trn);
+
+ WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
+ WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
+ WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
+ WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW3, data3);
+ xgpu_nv_mailbox_set_valid(adev, true);
+
+ /* start to poll ack */
+ r = xgpu_nv_poll_ack(adev);
+ if (r)
+ pr_err("Doesn't get ack from pf, continue\n");
+
+ xgpu_nv_mailbox_set_valid(adev, false);
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ int r, retry = 1;
+ enum idh_event event = -1;
+
+send_request:
+ xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ switch (req) {
+ case IDH_REQ_GPU_INIT_ACCESS:
+ case IDH_REQ_GPU_FINI_ACCESS:
+ case IDH_REQ_GPU_RESET_ACCESS:
+ event = IDH_READY_TO_ACCESS_GPU;
+ break;
+ case IDH_REQ_GPU_INIT_DATA:
+ event = IDH_REQ_GPU_INIT_DATA_READY;
+ break;
+ default:
+ break;
+ }
+
+ if (event != -1) {
+ r = xgpu_nv_poll_msg(adev, event);
+ if (r) {
+ if (retry++ < 2)
+ goto send_request;
+
+ if (req != IDH_REQ_GPU_INIT_DATA) {
+ pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+ return r;
+ } else /* host doesn't support REQ_GPU_INIT_DATA handshake */
+ adev->virt.req_init_data_ver = 0;
+ } else {
+ if (req == IDH_REQ_GPU_INIT_DATA) {
+ adev->virt.req_init_data_ver =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
+
+ /* assume V1 in case host doesn't set version number */
+ if (adev->virt.req_init_data_ver < 1)
+ adev->virt.req_init_data_ver = 1;
+ }
+ }
+
+ /* Retrieve checksum from mailbox2 */
+ if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
+ adev->virt.fw_reserve.checksum_key =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+ }
+ }
+
+ return 0;
+}
+
+static int xgpu_nv_request_reset(struct amdgpu_device *adev)
+{
+ int ret, i = 0;
+
+ while (i < NV_MAILBOX_POLL_MSG_REP_MAX) {
+ ret = xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+ if (!ret)
+ break;
+ i++;
+ }
+
+ return ret;
+}
+
+static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+
+ req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+ return xgpu_nv_send_access_requests(adev, req);
+}
+
+static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+ int r = 0;
+
+ req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+ r = xgpu_nv_send_access_requests(adev, req);
+
+ return r;
+}
+
+static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+}
+
+static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("get ack intr and do nothing.\n");
+ return 0;
+}
+
+static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+
+ WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
+
+ return 0;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+
+ /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+ * otherwise the mailbox msg will be ruined/reseted by
+ * the VF FLR.
+ */
+ if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
+ return;
+
+ down_write(&adev->reset_domain->sem);
+
+ amdgpu_virt_fini_data_exchange(adev);
+
+ xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+
+ do {
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+ goto flr_done;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+flr_done:
+ atomic_set(&adev->reset_domain->in_gpu_reset, 0);
+ up_write(&adev->reset_domain->sem);
+
+ /* Trigger recovery for world switch failure if no TDR */
+ if (amdgpu_device_should_recover_gpu(adev)
+ && (!amdgpu_device_has_job_running(adev) ||
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+
+ WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
+
+ return 0;
+}
+
+static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+
+ switch (event) {
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_ack_irq,
+ .process = xgpu_nv_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_rcv_irq,
+ .process = xgpu_nv_mailbox_rcv_irq,
+};
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->virt.ack_irq.num_types = 1;
+ adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs;
+ adev->virt.rcv_irq.num_types = 1;
+ adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ return 0;
+}
+
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+ if (r)
+ return r;
+ r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+
+ return 0;
+}
+
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
+{
+ amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev)
+{
+ xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+}
+
+const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
+ .req_full_gpu = xgpu_nv_request_full_gpu_access,
+ .rel_full_gpu = xgpu_nv_release_full_gpu_access,
+ .req_init_data = xgpu_nv_request_init_data,
+ .reset_gpu = xgpu_nv_request_reset,
+ .wait_reset = NULL,
+ .trans_msg = xgpu_nv_mailbox_trans_msg,
+ .ras_poison_handler = xgpu_nv_ras_poison_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
new file mode 100644
index 000000000..d0221ce08
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MXGPU_NV_H__
+#define __MXGPU_NV_H__
+
+#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define NV_MAILBOX_POLL_MSG_REP_MAX 11
+
+enum idh_request {
+ IDH_REQ_GPU_INIT_ACCESS = 1,
+ IDH_REL_GPU_INIT_ACCESS,
+ IDH_REQ_GPU_FINI_ACCESS,
+ IDH_REL_GPU_FINI_ACCESS,
+ IDH_REQ_GPU_RESET_ACCESS,
+ IDH_REQ_GPU_INIT_DATA,
+
+ IDH_LOG_VF_ERROR = 200,
+ IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
+};
+
+enum idh_event {
+ IDH_CLR_MSG_BUF = 0,
+ IDH_READY_TO_ACCESS_GPU,
+ IDH_FLR_NOTIFICATION,
+ IDH_FLR_NOTIFICATION_CMPL,
+ IDH_SUCCESS,
+ IDH_FAIL,
+ IDH_QUERY_ALIVE,
+ IDH_REQ_GPU_INIT_DATA_READY,
+
+ IDH_TEXT_MESSAGE = 255,
+};
+
+extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev);
+
+#define mmMAILBOX_CONTROL 0xE5E
+
+#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (mmMAILBOX_CONTROL * 4)
+#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE + 1)
+
+#define mmMAILBOX_MSGBUF_TRN_DW0 0xE56
+#define mmMAILBOX_MSGBUF_TRN_DW1 0xE57
+#define mmMAILBOX_MSGBUF_TRN_DW2 0xE58
+#define mmMAILBOX_MSGBUF_TRN_DW3 0xE59
+
+#define mmMAILBOX_MSGBUF_RCV_DW0 0xE5A
+#define mmMAILBOX_MSGBUF_RCV_DW1 0xE5B
+#define mmMAILBOX_MSGBUF_RCV_DW2 0xE5C
+#define mmMAILBOX_MSGBUF_RCV_DW3 0xE5D
+
+#define mmMAILBOX_INT_CNTL 0xE5F
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
new file mode 100644
index 000000000..59f53c743
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -0,0 +1,638 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Xiangliang.Yu@amd.com
+ */
+
+#include "amdgpu.h"
+#include "vi.h"
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+#include "vid.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gmc_v8_0.h"
+#include "gfx_v8_0.h"
+#include "sdma_v3_0.h"
+#include "tonga_ih.h"
+#include "gmc/gmc_8_2_d.h"
+#include "gmc/gmc_8_2_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+#include "smu/smu_7_1_3_d.h"
+#include "mxgpu_vi.h"
+
+#include "amdgpu_reset.h"
+
+/* VI golden setting */
+static const u32 xgpu_fiji_mgcg_cgcg_init[] = {
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+ mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmSMC_IND_INDEX_4, 0xffffffff, 0xC060000C,
+ mmSMC_IND_DATA_4, 0xc0000fff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100,
+};
+
+static const u32 xgpu_fiji_golden_settings_a10[] = {
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x1f311fff, 0x12300000,
+ mmHDMI_CONTROL, 0x31000111, 0x00000011,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 xgpu_fiji_golden_common_all[] = {
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
+};
+
+static const u32 xgpu_tonga_mgcg_cgcg_init[] = {
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
+ mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
+ mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
+ mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
+ mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+ mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmSMC_IND_INDEX_4, 0xffffffff, 0xC060000C,
+ mmSMC_IND_DATA_4, 0xc0000fff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100,
+};
+
+static const u32 xgpu_tonga_golden_settings_a11[] = {
+ mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+ mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x1f311fff, 0x12300000,
+ mmGB_GPU_ID, 0x0000000f, 0x00000000,
+ mmHDMI_CONTROL, 0x31000111, 0x00000011,
+ mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000,
+ mmMC_HUB_RDREQ_DMIF_LIMIT, 0x0000007f, 0x00000028,
+ mmMC_HUB_WDP_UMC, 0x00007fb6, 0x00000991,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 xgpu_tonga_golden_common_all[] = {
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
+ mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
+};
+
+void xgpu_vi_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(
+ xgpu_fiji_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_fiji_golden_settings_a10,
+ ARRAY_SIZE(
+ xgpu_fiji_golden_settings_a10));
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_fiji_golden_common_all,
+ ARRAY_SIZE(
+ xgpu_fiji_golden_common_all));
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(
+ xgpu_tonga_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_tonga_golden_settings_a11,
+ ARRAY_SIZE(
+ xgpu_tonga_golden_settings_a11));
+ amdgpu_device_program_register_sequence(adev,
+ xgpu_tonga_golden_common_all,
+ ARRAY_SIZE(
+ xgpu_tonga_golden_common_all));
+ break;
+ default:
+ BUG_ON("Doesn't support chip type.\n");
+ break;
+ }
+}
+
+/*
+ * Mailbox communication between GPU hypervisor and VFs
+ */
+static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev)
+{
+ u32 reg;
+ int timeout = VI_MAILBOX_TIMEDOUT;
+ u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID);
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, RCV_MSG_ACK, 1);
+ WREG32_NO_KIQ(mmMAILBOX_CONTROL, reg);
+
+ /*Wait for RCV_MSG_VALID to be 0*/
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ while (reg & mask) {
+ if (timeout <= 0) {
+ pr_err("RCV_MSG_VALID is not cleared\n");
+ break;
+ }
+ mdelay(1);
+ timeout -= 1;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ }
+}
+
+static void xgpu_vi_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ reg = REG_SET_FIELD(reg, MAILBOX_CONTROL,
+ TRN_MSG_VALID, val ? 1 : 0);
+ WREG32_NO_KIQ(mmMAILBOX_CONTROL, reg);
+}
+
+static void xgpu_vi_mailbox_trans_msg(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0);
+ reg = REG_SET_FIELD(reg, MAILBOX_MSGBUF_TRN_DW0,
+ MSGBUF_DATA, req);
+ WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, reg);
+
+ xgpu_vi_mailbox_set_valid(adev, true);
+}
+
+static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev,
+ enum idh_event event)
+{
+ u32 reg;
+ u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID);
+
+ /* workaround: host driver doesn't set VALID for CMPL now */
+ if (event != IDH_FLR_NOTIFICATION_CMPL) {
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ if (!(reg & mask))
+ return -ENOENT;
+ }
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
+ if (reg != event)
+ return -ENOENT;
+
+ /* send ack to PF */
+ xgpu_vi_mailbox_send_ack(adev);
+
+ return 0;
+}
+
+static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
+{
+ int r = 0, timeout = VI_MAILBOX_TIMEDOUT;
+ u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, TRN_MSG_ACK);
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ while (!(reg & mask)) {
+ if (timeout <= 0) {
+ pr_err("Doesn't get ack from pf.\n");
+ r = -ETIME;
+ break;
+ }
+ mdelay(5);
+ timeout -= 5;
+
+ reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
+ }
+
+ return r;
+}
+
+static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+ int r = 0, timeout = VI_MAILBOX_TIMEDOUT;
+
+ r = xgpu_vi_mailbox_rcv_msg(adev, event);
+ while (r) {
+ if (timeout <= 0) {
+ pr_err("Doesn't get ack from pf.\n");
+ r = -ETIME;
+ break;
+ }
+ mdelay(5);
+ timeout -= 5;
+
+ r = xgpu_vi_mailbox_rcv_msg(adev, event);
+ }
+
+ return r;
+}
+
+static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request request)
+{
+ int r;
+
+ xgpu_vi_mailbox_trans_msg(adev, request);
+
+ /* start to poll ack */
+ r = xgpu_vi_poll_ack(adev);
+ if (r)
+ return r;
+
+ xgpu_vi_mailbox_set_valid(adev, false);
+
+ /* start to check msg if request is idh_req_gpu_init_access */
+ if (request == IDH_REQ_GPU_INIT_ACCESS ||
+ request == IDH_REQ_GPU_FINI_ACCESS ||
+ request == IDH_REQ_GPU_RESET_ACCESS) {
+ r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+ if (r) {
+ pr_err("Doesn't get ack from pf, give up\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int xgpu_vi_request_reset(struct amdgpu_device *adev)
+{
+ return xgpu_vi_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+}
+
+static int xgpu_vi_wait_reset_cmpl(struct amdgpu_device *adev)
+{
+ return xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL);
+}
+
+static int xgpu_vi_request_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+
+ req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+ return xgpu_vi_send_access_requests(adev, req);
+}
+
+static int xgpu_vi_release_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+ int r = 0;
+
+ req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+ r = xgpu_vi_send_access_requests(adev, req);
+
+ return r;
+}
+
+/* add support mailbox interrupts */
+static int xgpu_vi_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("get ack intr and do nothing.\n");
+ return 0;
+}
+
+static int xgpu_vi_set_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, ACK_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
+
+ return 0;
+}
+
+static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ /* wait until RCV_MSG become 3 */
+ if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
+ pr_err("failed to receive FLR_CMPL\n");
+ return;
+ }
+
+ /* Trigger recovery due to world switch failure */
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
+}
+
+static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, VALID_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp);
+
+ return 0;
+}
+
+static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int r;
+
+ /* trigger gpu-reset by hypervisor only if TDR disabled */
+ if (!amdgpu_gpu_recovery) {
+ /* see what event we get */
+ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
+
+ /* only handle FLR_NOTIFY now */
+ if (!r && !amdgpu_in_reset(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_vi_mailbox_ack_irq_funcs = {
+ .set = xgpu_vi_set_mailbox_ack_irq,
+ .process = xgpu_vi_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_vi_mailbox_rcv_irq_funcs = {
+ .set = xgpu_vi_set_mailbox_rcv_irq,
+ .process = xgpu_vi_mailbox_rcv_irq,
+};
+
+void xgpu_vi_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->virt.ack_irq.num_types = 1;
+ adev->virt.ack_irq.funcs = &xgpu_vi_mailbox_ack_irq_funcs;
+ adev->virt.rcv_irq.num_types = 1;
+ adev->virt.rcv_irq.funcs = &xgpu_vi_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ return 0;
+}
+
+int xgpu_vi_mailbox_get_irq(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+ if (r)
+ return r;
+ r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ INIT_WORK(&adev->virt.flr_work, xgpu_vi_mailbox_flr_work);
+
+ return 0;
+}
+
+void xgpu_vi_mailbox_put_irq(struct amdgpu_device *adev)
+{
+ amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+const struct amdgpu_virt_ops xgpu_vi_virt_ops = {
+ .req_full_gpu = xgpu_vi_request_full_gpu_access,
+ .rel_full_gpu = xgpu_vi_release_full_gpu_access,
+ .reset_gpu = xgpu_vi_request_reset,
+ .wait_reset = xgpu_vi_wait_reset_cmpl,
+ .trans_msg = NULL, /* Does not need to trans VF errors to host. */
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
new file mode 100644
index 000000000..713ee66a4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __MXGPU_VI_H__
+#define __MXGPU_VI_H__
+
+#define VI_MAILBOX_TIMEDOUT 12000
+#define VI_MAILBOX_RESET_TIME 12
+
+/* VI mailbox messages request */
+enum idh_request {
+ IDH_REQ_GPU_INIT_ACCESS = 1,
+ IDH_REL_GPU_INIT_ACCESS,
+ IDH_REQ_GPU_FINI_ACCESS,
+ IDH_REL_GPU_FINI_ACCESS,
+ IDH_REQ_GPU_RESET_ACCESS,
+
+ IDH_LOG_VF_ERROR = 200,
+};
+
+/* VI mailbox messages data */
+enum idh_event {
+ IDH_CLR_MSG_BUF = 0,
+ IDH_READY_TO_ACCESS_GPU,
+ IDH_FLR_NOTIFICATION,
+ IDH_FLR_NOTIFICATION_CMPL,
+
+ IDH_TEXT_MESSAGE = 255
+};
+
+extern const struct amdgpu_virt_ops xgpu_vi_virt_ops;
+
+void xgpu_vi_init_golden_registers(struct amdgpu_device *adev);
+void xgpu_vi_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_vi_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_vi_mailbox_put_irq(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
new file mode 100644
index 000000000..b6a8478da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "navi10_ih.h"
+
+#define MAX_REARM_RETRY 10
+
+#define mmIH_CHICKEN_Sienna_Cichlid 0x018d
+#define mmIH_CHICKEN_Sienna_Cichlid_BASE_IDX 0
+
+static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * navi10_ih_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (NAVI10).
+ */
+static void navi10_ih_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+
+ if (adev->irq.ih2.ring_size) {
+ ih_regs = &adev->irq.ih2.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ if (adev->ip_versions[OSSSYS_HWIP][0] < IP_VERSION(5, 0, 3))
+ return;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
+ RB_USED_INT_THRESHOLD, threshold);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, mmIH_CNTL2, ih_cntl);
+}
+
+/**
+ * navi10_ih_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointet
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (NAVI10)
+ */
+static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * navi10_ih_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (NAVI10).
+ */
+static int navi10_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = navi10_ih_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 1 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t navi10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * navi10_ih_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (NAVI10)
+ */
+static int navi10_ih_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = navi10_ih_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, navi10_ih_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * navi10_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (NAVI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int navi10_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ u32 ih_chicken;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = navi10_ih_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
+ if (ih[0]->use_bus_addr) {
+ switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ case IP_VERSION(5, 0, 3):
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 1):
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid, ih_chicken);
+ break;
+ default:
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = navi10_ih_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0*/
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = navi10_ih_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * navi10_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (NAVI10).
+ */
+static void navi10_ih_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ navi10_ih_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * navi10_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (NAVI10). Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
+ /* Only ring0 supports writeback. On other rings fall back
+ * to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
+ */
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ }
+
+ ih_regs = &ih->ih_regs;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * navi10_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring to match
+ *
+ */
+static void navi10_ih_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * navi10_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * @ih: IH ring buffer to set rptr
+ * Set the IH ring buffer rptr.
+ */
+static void navi10_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih_soft)
+ return;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ navi10_ih_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * navi10_ih_self_irq - dispatch work for ring 1 and 2
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int navi10_ih_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ switch (entry->ring_id) {
+ case 1:
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ case 2:
+ schedule_work(&adev->irq.ih2_work);
+ break;
+ default: break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs navi10_ih_self_irq_funcs = {
+ .process = navi10_ih_self_irq,
+};
+
+static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
+}
+
+static int navi10_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_set_interrupt_funcs(adev);
+ navi10_ih_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int navi10_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ if ((adev->flags & AMD_IS_APU) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ use_bus_addr = false;
+ else
+ use_bus_addr = true;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ adev->irq.ih1.ring_size = 0;
+ adev->irq.ih2.ring_size = 0;
+
+ /* initialize ih control registers offset */
+ navi10_ih_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int navi10_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int navi10_ih_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return navi10_ih_irq_init(adev);
+}
+
+static int navi10_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int navi10_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return navi10_ih_hw_fini(adev);
+}
+
+static int navi10_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return navi10_ih_hw_init(adev);
+}
+
+static bool navi10_ih_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int navi10_ih_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int navi10_ih_soft_reset(void *handle)
+{
+ /* todo */
+ return 0;
+}
+
+static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int navi10_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static int navi10_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void navi10_ih_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs navi10_ih_ip_funcs = {
+ .name = "navi10_ih",
+ .early_init = navi10_ih_early_init,
+ .late_init = NULL,
+ .sw_init = navi10_ih_sw_init,
+ .sw_fini = navi10_ih_sw_fini,
+ .hw_init = navi10_ih_hw_init,
+ .hw_fini = navi10_ih_hw_fini,
+ .suspend = navi10_ih_suspend,
+ .resume = navi10_ih_resume,
+ .is_idle = navi10_ih_is_idle,
+ .wait_for_idle = navi10_ih_wait_for_idle,
+ .soft_reset = navi10_ih_soft_reset,
+ .set_clockgating_state = navi10_ih_set_clockgating_state,
+ .set_powergating_state = navi10_ih_set_powergating_state,
+ .get_clockgating_state = navi10_ih_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs navi10_ih_funcs = {
+ .get_wptr = navi10_ih_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = navi10_ih_set_rptr
+};
+
+static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ if (adev->irq.ih_funcs == NULL)
+ adev->irq.ih_funcs = &navi10_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version navi10_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &navi10_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.h b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h
new file mode 100644
index 000000000..140fbdaae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NAVI10_IH_H__
+#define __NAVI10_IH_H__
+
+extern const struct amdgpu_ip_block_version navi10_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
new file mode 100644
index 000000000..a5b60c9a2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -0,0 +1,4822 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NAVI10_SDMA_PKT_OPEN_H_
+#define __NAVI10_SDMA_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
+/*define for op field*/
+#define SDMA_PKT_HEADER_op_offset 0
+#define SDMA_PKT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_op_shift 0
+#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_HEADER_sub_op_offset 0
+#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_sub_op_shift 8
+#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cc field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift 20
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CC(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 11
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) ((x & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18
+#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift)
+
+/*define for gcr_control_15_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift)
+
+/*define for limit_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __NAVI10_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
new file mode 100644
index 000000000..ef368ca79
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v2_3.h"
+
+#include "nbio/nbio_2_3_default.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
+#define smnPCIE_CONFIG_CNTL 0x11180044
+#define smnCPM_CONTROL 0x11180460
+#define smnPCIE_CNTL2 0x11180070
+#define smnPCIE_LC_CNTL 0x11140280
+#define smnPCIE_LC_CNTL3 0x111402d4
+#define smnPCIE_LC_CNTL6 0x111402ec
+#define smnPCIE_LC_CNTL7 0x111402f0
+#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c
+#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123538
+#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324
+#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+
+#define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6
+#define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2
+#define mmBIF_SDMA3_DOORBELL_RANGE 0x01d7
+#define mmBIF_SDMA3_DOORBELL_RANGE_BASE_IDX 2
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
+#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
+
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288
+
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L
+
+static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ /*
+ * guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+ * therefore we force rev_id to 0 (which is the default value)
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ return 0;
+ }
+
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v2_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+ instance == 1 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) :
+ instance == 2 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA2_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA3_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, SIZE,
+ doorbell_size);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, SIZE,
+ 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = instance ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v2_3_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN,
+ enable ? 1 : 0);
+}
+
+static void nbio_v2_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v2_3_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+static void nbio_v2_3_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_PCIE(smnCPM_CONTROL);
+ if (enable) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnCPM_CONTROL, data);
+}
+
+static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v2_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v2_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v2_3_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v2_3_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+}
+
+#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1
+#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x0000000A // 1=1us, 9=1ms, 10=4ms
+#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 400ms
+
+static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+
+ if (enable) {
+ /* Disable ASPM L0s/L1 first */
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+
+ data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+
+ if (dev_is_removable(&adev->pdev->dev))
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ else
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ } else {
+ /* Disbale ASPM L1 */
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ /* Disable ASPM TxL0s */
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ /* Disable ACPI L1 */
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP2, data);
+
+ def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK |
+ PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v2_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ if (dev_is_removable(&adev->pdev->dev))
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ else
+ data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t link_width = 0;
+
+ if (!((adev->asic_type >= CHIP_NAVI10) &&
+ (adev->asic_type <= CHIP_NAVI12)))
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+ >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+
+ /*
+ * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data)
+ * if link_width is 0x3 (x4)
+ */
+ if (0x3 == link_width) {
+ reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK;
+ reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT);
+ WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data);
+ }
+}
+
+static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->asic_type != CHIP_NAVI10)
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK;
+ WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
+}
+
+static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
+{
+ uint32_t reg, reg_data;
+
+ if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0))
+ return;
+
+ reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
+
+ /* Clear Interrupt Status
+ */
+ if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) {
+ reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (reg & BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) {
+ reg_data = 1 << BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT;
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, reg_data);
+ }
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
+ .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v2_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v2_3_get_rev_id,
+ .mc_access_enable = nbio_v2_3_mc_access_enable,
+ .get_memsize = nbio_v2_3_get_memsize,
+ .sdma_doorbell_range = nbio_v2_3_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v2_3_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v2_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v2_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v2_3_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v2_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v2_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v2_3_get_clockgating_state,
+ .ih_control = nbio_v2_3_ih_control,
+ .init_registers = nbio_v2_3_init_registers,
+ .remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
+ .enable_aspm = nbio_v2_3_enable_aspm,
+ .program_aspm = nbio_v2_3_program_aspm,
+ .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
+ .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
+ .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
new file mode 100644
index 000000000..a43b60acf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V2_3_H__
+#define __NBIO_V2_3_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
new file mode 100644
index 000000000..e5b5b0f49
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v4_3.h"
+
+#include "nbio/nbio_4_3_0_offset.h"
+#include "nbio/nbio_4_3_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v4_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v4_3_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v4_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v4_3_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v4_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbio_v4_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbio_v4_3_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbio_v4_3_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v4_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+static void nbio_v4_3_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbio_v4_3_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (enable) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regCPM_CONTROL, data);
+}
+
+static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ /* TODO: need update in future */
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (enable) {
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ } else {
+ data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_CNTL2, data);
+}
+
+static void nbio_v4_3_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v4_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v4_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v4_3_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v4_3_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
+{
+ if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(4, 3, 0)) {
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+ }
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+}
+
+static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ if (adev->pdev->ltr_path)
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ else
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) &&
+ !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0)))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbio_v4_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+ .program_aspm = nbio_v4_3_program_aspm,
+};
+
+
+static void nbio_v4_3_sriov_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+}
+
+static void nbio_v4_3_sriov_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+}
+
+static void nbio_v4_3_sriov_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+}
+
+static void nbio_v4_3_sriov_gc_doorbell_init(struct amdgpu_device *adev)
+{
+}
+
+const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = {
+ .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v4_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v4_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v4_3_get_rev_id,
+ .mc_access_enable = nbio_v4_3_mc_access_enable,
+ .get_memsize = nbio_v4_3_get_memsize,
+ .sdma_doorbell_range = nbio_v4_3_sriov_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v4_3_sriov_vcn_doorbell_range,
+ .gc_doorbell_init = nbio_v4_3_sriov_gc_doorbell_init,
+ .enable_doorbell_aperture = nbio_v4_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v4_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v4_3_sriov_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v4_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v4_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v4_3_get_clockgating_state,
+ .ih_control = nbio_v4_3_ih_control,
+ .init_registers = nbio_v4_3_init_registers,
+ .remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
+ .get_rom_offset = nbio_v4_3_get_rom_offset,
+};
+
+static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbio_v4_3_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v4_3_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v4_3_set_ras_err_event_athub_irq_state,
+ .process = nbio_v4_3_process_err_event_athub_irq,
+};
+
+static void nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v4_3_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v4_3_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbio v4_3 uses the same irq source as nbio v7_4 */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbio_v4_3_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v4_3_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt = nbio_v4_3_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
new file mode 100644
index 000000000..399037cdf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V4_3_H__
+#define __NBIO_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_funcs;
+extern const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs;
+extern struct amdgpu_nbio_ras nbio_v4_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
new file mode 100644
index 000000000..37615a772
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v6_1.h"
+
+#include "nbio/nbio_6_1_default.h"
+#include "nbio/nbio_6_1_offset.h"
+#include "nbio/nbio_6_1_sh_mask.h"
+#include "nbio/nbio_6_1_smn.h"
+#include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define smnPCIE_LC_CNTL 0x11140280
+#define smnPCIE_LC_CNTL3 0x111402d4
+#define smnPCIE_LC_CNTL6 0x111402ec
+#define smnPCIE_LC_CNTL7 0x111402f0
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
+#define NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK 0x00001000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L
+#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123530
+#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c
+#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324
+#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4
+#define smnRCC_BIF_STRAP2 0x10123488
+#define smnRCC_BIF_STRAP3 0x1012348c
+#define smnRCC_BIF_STRAP5 0x10123494
+#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0
+
+static void nbio_v6_1_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+
+}
+
+static void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+
+static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, SIZE, 6);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+static void nbio_v6_1_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnCPM_CONTROL);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PERM_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PERM_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnCPM_CONTROL, data);
+}
+
+static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v6_1_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK
+};
+
+static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
+ data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CI_CNTL, data);
+
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP2, data);
+
+ def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK |
+ PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v6_1_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
+ .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset,
+ .get_rev_id = nbio_v6_1_get_rev_id,
+ .mc_access_enable = nbio_v6_1_mc_access_enable,
+ .get_memsize = nbio_v6_1_get_memsize,
+ .sdma_doorbell_range = nbio_v6_1_sdma_doorbell_range,
+ .enable_doorbell_aperture = nbio_v6_1_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v6_1_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v6_1_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v6_1_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v6_1_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v6_1_get_clockgating_state,
+ .ih_control = nbio_v6_1_ih_control,
+ .init_registers = nbio_v6_1_init_registers,
+ .remap_hdp_registers = nbio_v6_1_remap_hdp_registers,
+ .program_aspm = nbio_v6_1_program_aspm,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
new file mode 100644
index 000000000..6dc743b73
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V6_1_H__
+#define __NBIO_V6_1_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
new file mode 100644
index 000000000..aa0326d00
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_0.h"
+
+#include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
+#include "nbio/nbio_7_0_sh_mask.h"
+#include "nbio/nbio_7_0_smn.h"
+#include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
+
+static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+
+}
+
+static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+static uint32_t nbio_7_0_read_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset)
+{
+ uint32_t data;
+
+ WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset);
+ data = RREG32_SOC15(NBIO, 0, mmSYSHUB_DATA);
+
+ return data;
+}
+
+static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset,
+ uint32_t data)
+{
+ WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset);
+ WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data);
+}
+
+static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ /* NBIF_MGCG_CTRL_LCLK */
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK;
+ else
+ data &= ~NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK;
+
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ /* SYSHUB_MGCG_CTRL_SOCCLK */
+ def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK;
+ else
+ data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK;
+
+ if (def != data)
+ nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK, data);
+
+ /* SYSHUB_MGCG_CTRL_SHUBCLK */
+ def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK;
+ else
+ data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK;
+
+ if (def != data)
+ nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data);
+}
+
+static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static void nbio_v7_0_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_0_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_0_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_0_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v7_0_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
+ .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset,
+ .get_rev_id = nbio_v7_0_get_rev_id,
+ .mc_access_enable = nbio_v7_0_mc_access_enable,
+ .get_memsize = nbio_v7_0_get_memsize,
+ .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_0_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_0_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_0_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_0_get_clockgating_state,
+ .ih_control = nbio_v7_0_ih_control,
+ .init_registers = nbio_v7_0_init_registers,
+ .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
new file mode 100644
index 000000000..e7aefb252
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_0_H__
+#define __NBIO_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
new file mode 100644
index 000000000..4ef1fa460
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_2.h"
+
+#include "nbio/nbio_7_2_0_offset.h"
+#include "nbio/nbio_7_2_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC 0x0015
+#define regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC_BASE_IDX 2
+#define regBIF_BX0_BIF_FB_EN_YC 0x0100
+#define regBIF_BX0_BIF_FB_EN_YC_BASE_IDX 2
+#define regBIF1_PCIE_MST_CTRL_3 0x4601c6
+#define regBIF1_PCIE_MST_CTRL_3_BASE_IDX 5
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT \
+ 0x1b
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT \
+ 0x1c
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK \
+ 0x08000000L
+#define BIF1_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK \
+ 0x30000000L
+#define regBIF1_PCIE_TX_POWER_CTRL_1 0x460187
+#define regBIF1_PCIE_TX_POWER_CTRL_1_BASE_IDX 5
+#define BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L
+#define BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L
+
+static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC);
+ break;
+ default:
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ break;
+ }
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0);
+ break;
+ default:
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+ break;
+ }
+}
+
+static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_2_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_2_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_2_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_2_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v7_2_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE));
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE),
+ ih_doorbell_range);
+}
+
+static void nbio_v7_2_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v7_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL), data);
+}
+
+static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
+
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0,
+ regBIF1_PCIE_TX_POWER_CTRL_1));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= (BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ else
+ data &= ~(BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1),
+ data);
+ break;
+ default:
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ else
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data);
+ break;
+ }
+}
+
+static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL));
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2));
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v7_2_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_2_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_2_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_2_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_2_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v7_2_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 2, 1):
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 0):
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3));
+ data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data);
+ break;
+ default:
+ def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL));
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data);
+ break;
+ }
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(7, 3, 0):
+ case IP_VERSION(7, 5, 1):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
+ data &= ~RCC_DEV2_EPF0_STRAP2__STRAP_NO_SOFT_RESET_DEV2_F0_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
+ break;
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_2_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_2_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_2_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_2_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_2_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_2_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_2_get_rev_id,
+ .mc_access_enable = nbio_v7_2_mc_access_enable,
+ .get_memsize = nbio_v7_2_get_memsize,
+ .sdma_doorbell_range = nbio_v7_2_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_2_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_2_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_2_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_2_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_2_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_2_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_2_get_clockgating_state,
+ .ih_control = nbio_v7_2_ih_control,
+ .init_registers = nbio_v7_2_init_registers,
+ .remap_hdp_registers = nbio_v7_2_remap_hdp_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h
new file mode 100644
index 000000000..a8e8e6564
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_2_H__
+#define __NBIO_V7_2_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
new file mode 100644
index 000000000..685abf57f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -0,0 +1,812 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_4.h"
+#include "amdgpu_ras.h"
+
+#include "nbio/nbio_7_4_offset.h"
+#include "nbio/nbio_7_4_sh_mask.h"
+#include "nbio/nbio_7_4_0_smn.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define smnPCIE_LC_CNTL 0x11140280
+#define smnPCIE_LC_CNTL3 0x111402d4
+#define smnPCIE_LC_CNTL6 0x111402ec
+#define smnPCIE_LC_CNTL7 0x111402f0
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+#define smnRCC_BIF_STRAP3 0x1012348c
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK 0x0000FFFFL
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK 0xFFFF0000L
+#define smnRCC_BIF_STRAP5 0x10123494
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK 0x0000FFFFL
+#define smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2 0x1014008c
+#define BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK 0x0400L
+#define smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP 0x10140324
+#define smnPSWUSP0_PCIE_LC_CNTL2 0x111402c4
+#define smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL 0x10123538
+#define smnRCC_BIF_STRAP2 0x10123488
+#define RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK 0x00004000L
+#define RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT 0x0
+#define RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT 0x10
+#define RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT 0x0
+
+/*
+ * These are nbio v7_4_1 registers mask. Temporarily define these here since
+ * nbio v7_4_1 header is incomplete.
+ */
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L /* Don't use. Firmware uses this bit internally */
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK 0x00040000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK 0x00080000L
+#define GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK 0x00100000L
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc
+#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
+//BIF_MMSCH1_DOORBELL_RANGE
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+
+#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE_ALDE 0x01d8
+#define mmBIF_MMSCH1_DOORBELL_RANGE_ALDE_BASE_IDX 2
+//BIF_MMSCH1_DOORBELL_ALDE_RANGE
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__OFFSET__SHIFT 0x2
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__SIZE__SHIFT 0x10
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__OFFSET_MASK 0x00000FFCL
+#define BIF_MMSCH1_DOORBELL_RANGE_ALDE__SIZE_MASK 0x001F0000L
+
+#define mmRCC_DEV0_EPF0_STRAP0_ALDE 0x0015
+#define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX 2
+
+#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x00fe
+#define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 2
+#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT 0x18
+#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK 0x01000000L
+
+#define mmBIF_INTR_CNTL_ALDE 0x0101
+#define mmBIF_INTR_CNTL_ALDE_BASE_IDX 2
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+
+static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_ALDE);
+ else
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_4_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 reg, doorbell_range;
+
+ if (instance < 2) {
+ reg = instance +
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE);
+ } else {
+ /*
+ * These registers address of SDMA2~7 is not consecutive
+ * from SDMA0~1. Need plus 4 dwords offset.
+ *
+ * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
+ * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
+ * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
++ * BIF_SDMA4_DOORBELL_RANGE:
++ * ARCTURUS: 0x3be0
++ * ALDEBARAN: 0x3be4
+ */
+ if (adev->asic_type == CHIP_ALDEBARAN && instance == 4)
+ reg = instance + 0x4 + 0x1 +
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_SDMA0_DOORBELL_RANGE);
+ else
+ reg = instance + 0x4 +
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_SDMA0_DOORBELL_RANGE);
+ }
+
+ doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, doorbell_size);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg;
+ u32 doorbell_range;
+
+ if (instance) {
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE_ALDE);
+ else
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE);
+ } else
+ reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+
+static void nbio_v7_4_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO: Add support for v7.4
+}
+
+static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static void nbio_v7_4_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_4_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_4_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_4_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
+ .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t baco_cntl;
+
+ if (amdgpu_sriov_vf(adev))
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+
+ if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4) &&
+ !amdgpu_sriov_vf(adev)) {
+ baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL);
+ if (baco_cntl &
+ (BACO_CNTL__BACO_DUMMY_EN_MASK | BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev, "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, 0, mmBACO_CNTL, baco_cntl);
+ }
+ }
+}
+
+static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
+ else
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ if (!ras->disable_ras_err_cnt_harvest) {
+ /*
+ * clear error status after ras_controller_intr
+ * according to hw team and count ue number
+ * for query
+ */
+ nbio_v7_4_query_ras_error_count(adev, &err_data);
+
+ /* logging on error cnt and printing for awareness */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable hardware "
+ "errors detected in %s block, "
+ "no user action is needed.\n",
+ obj->err_data.ce_count,
+ get_ras_block_str(adev->nbio.ras_if));
+
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ue_count,
+ get_ras_block_str(adev->nbio.ras_if));
+ }
+
+ dev_info(adev->dev, "RAS controller interrupt triggered "
+ "by NBIF error\n");
+
+ /* ras_controller_int is dedicated for nbif ras error,
+ * not the global interrupt for sync flood
+ */
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
+
+static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
+ else
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+
+static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+ else
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+ else
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = {
+ .set = nbio_v7_4_set_ras_controller_irq_state,
+ .process = nbio_v7_4_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_4_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_4_process_err_event_athub_irq,
+};
+
+static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_4_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_4_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE 0x13b20030
+#define smnRAS_GLOBAL_STATUS_LO_ALDE 0x13b20020
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ uint32_t global_sts, central_sts, int_eoi, parity_sts;
+ uint32_t corr, fatal, non_fatal;
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO_ALDE);
+ else
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+
+ corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr);
+ fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
+ non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
+ ParityErrNonFatal);
+
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE);
+ else
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
+
+ if (corr)
+ err_data->ce_count++;
+ if (fatal)
+ err_data->ue_count++;
+
+ if (corr || fatal || non_fatal) {
+ central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS);
+
+ /* clear error status register */
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO_ALDE, global_sts);
+ else
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+
+ if (fatal)
+ {
+ /* clear parity fatal error indication field */
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2_ALDE, parity_sts);
+ else
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2, parity_sts);
+ }
+
+ if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
+ BIFL_RasContller_Intr_Recv)) {
+ /* clear interrupt status register */
+ WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts);
+ int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI);
+ int_eoi = REG_SET_FIELD(int_eoi,
+ IOHC_INTERRUPT_EOI, SMI_EOI, 1);
+ WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi);
+ }
+ }
+}
+
+static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL_ALDE,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+ else
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+}
+
+const struct amdgpu_ras_block_hw_ops nbio_v7_4_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_4_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_4_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_4_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
+ .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
+};
+
+
+#ifdef CONFIG_PCIEASPM
+static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, 0x75EB);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP2, data);
+
+ def = data = RREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+ return;
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
+ data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_REG_DIS_LCLK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK |
+ PCIE_LC_CNTL6__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+
+ /* Don't bother about LTR if LTR is not enabled
+ * in the path */
+ if (adev->pdev->ltr_path)
+ nbio_v7_4_program_ltr(adev);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP3, data);
+
+ def = data = RREG32_PCIE(smnRCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnRCC_BIF_STRAP5, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL, data);
+
+ def = data = RREG32_PCIE(smnPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_PCIE(smnPCIE_LC_CNTL3, data);
+#endif
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_4_get_pcie_data_offset,
+ .get_rev_id = nbio_v7_4_get_rev_id,
+ .mc_access_enable = nbio_v7_4_mc_access_enable,
+ .get_memsize = nbio_v7_4_get_memsize,
+ .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt,
+ .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_4_get_clockgating_state,
+ .ih_control = nbio_v7_4_ih_control,
+ .init_registers = nbio_v7_4_init_registers,
+ .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
+ .program_aspm = nbio_v7_4_program_aspm,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
new file mode 100644
index 000000000..f27c41728
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_4_H__
+#define __NBIO_V7_4_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_4_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
new file mode 100644
index 000000000..def89379b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_7.h"
+
+#include "nbio/nbio_7_7_0_offset.h"
+#include "nbio/nbio_7_7_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_7_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_7_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_SDMA0_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_7_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v7_7_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,
+ regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_7_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_7_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_7_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_7_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_7_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbio_v7_7_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF0_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
+
+}
+
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_7_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_7_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_7_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_7_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_7_get_rev_id,
+ .mc_access_enable = nbio_v7_7_mc_access_enable,
+ .get_memsize = nbio_v7_7_get_memsize,
+ .sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_7_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
+ .ih_control = nbio_v7_7_ih_control,
+ .init_registers = nbio_v7_7_init_registers,
+ .remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
new file mode 100644
index 000000000..2a33b256b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_7_H__
+#define __NBIO_V7_7_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_7_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_7_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_7_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
new file mode 100644
index 000000000..ae45656eb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_9.h"
+#include "amdgpu_ras.h"
+
+#include "nbio/nbio_7_9_0_offset.h"
+#include "nbio/nbio_7_9_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define NPS_MODE_MASK 0x000000FFL
+
+/* Core 0 Port 0 counter */
+#define smnPCIEP_NAK_COUNTER 0x1A340218
+
+#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c
+#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888
+#define smnPCIE_PERF_COUNT_CNTL 0x1A380200
+#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220
+#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C
+#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8
+#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918
+
+
+static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, STRAP_ATI_REV_ID_DEV0_F0);
+
+ return tmp;
+}
+
+static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_9_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ int aid_id, dev_inst;
+
+ dev_inst = GET_INST(SDMA0, instance);
+ aid_id = adev->sdma.instance[instance].aid_id;
+
+ if (use_doorbell == false)
+ return;
+
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY, doorbell_index);
+ doorbell_range =
+ REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, doorbell_size);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl =
+ REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, doorbell_size);
+
+ switch (dev_inst % adev->sdma.num_inst_per_aid) {
+ case 0:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xe);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x1);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_1_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 1:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x8);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x2);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_2_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 2:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x8);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_5_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ case 3:
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4,
+ 4 * aid_id, doorbell_range);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0xa);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x9);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_6_CTRL,
+ aid_id, doorbell_ctrl);
+ break;
+ default:
+ break;
+ }
+
+ return;
+}
+
+static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 doorbell_range = 0, doorbell_ctrl = 0;
+ u32 aid_id = instance;
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ 0x9);
+ if (aid_id)
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ DOORBELL0_FENCE_ENABLE_ENTRY,
+ 0x4);
+
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+
+ WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17,
+ aid_id, doorbell_range);
+ WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL,
+ aid_id, doorbell_ctrl);
+ }
+}
+
+static void nbio_v7_9_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ /* Enable to allow doorbell pass thru on pre-silicon bare-metal */
+ WREG32_SOC15(NBIO, 0, regBIFC_DOORBELL_ACCESS_EN_PF, 0xfffff);
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_9_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = 0, ih_doorbell_ctrl = 0;
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY,
+ 0x8);
+
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE, 1);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ DOORBELL0_CTRL_ENTRY_0,
+ BIF_DOORBELL0_RANGE_SIZE_ENTRY, 0);
+ ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl,
+ S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE, 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_0, ih_doorbell_range);
+ WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_3_CTRL, ih_doorbell_ctrl);
+}
+
+
+static void nbio_v7_9_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void nbio_v7_9_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static void nbio_v7_9_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl =
+ REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_9_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_9_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_9_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2);
+}
+
+static u32 nbio_v7_9_get_pcie_index_hi_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2_HI);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+ .ref_and_mask_sdma2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK,
+ .ref_and_mask_sdma3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
+ .ref_and_mask_sdma4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
+ .ref_and_mask_sdma5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
+ .ref_and_mask_sdma6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
+ .ref_and_mask_sdma7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
+};
+
+static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
+}
+
+static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
+{
+ u32 tmp, px;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS);
+ px = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_STATUS,
+ PARTITION_MODE);
+
+ return px;
+}
+
+static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
+ u32 *supp_modes)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE);
+
+ if (supp_modes) {
+ *supp_modes =
+ RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP);
+ }
+
+ return ffs(tmp);
+}
+
+static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
+{
+ u32 inst_mask;
+ int i;
+
+ WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
+ 0xff & ~(adev->gfx.xcc_mask));
+
+ WREG32_SOC15(NBIO, 0, regBIFC_GFX_INT_MONITOR_MASK, 0x7ff);
+
+ inst_mask = adev->aid_mask & ~1U;
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15_EXT(NBIO, i, regXCC_DOORBELL_FENCE, i,
+ XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
+
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ u32 baco_cntl;
+ for_each_inst(i, adev->aid_mask) {
+ baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+ if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(
+ BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev,
+ "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+ baco_cntl);
+ }
+ }
+ }
+}
+
+static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ u32 val, nak_r, nak_g;
+
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ /* Get the number of NAKs received and generated */
+ val = RREG32_PCIE(smnPCIEP_NAK_COUNTER);
+ nak_r = val & 0xFFFF;
+ nak_g = val >> 16;
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
+static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctrrx = 0;
+ uint32_t perfctrtx = 0;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Use TXCLK3 counter group for rx event */
+ /* Use TXCLK7 counter group for tx event */
+ /* Set the 2 events that we wish to watch, defined above */
+ /* 40 is event# for received msgs */
+ /* 2 is event# of posted requests sent */
+ perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40);
+ perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx);
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx);
+
+ /* Zero out and enable SHADOW_WR
+ * Write 0x6:
+ * Bit 1 = Global Shadow wr(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
+
+ /* Enable Gloabl Counter
+ * Write 0x1:
+ * Bit 0 = Global Counter Enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001);
+
+ msleep(1000);
+
+ /* Disable Global Counter, Reset and enable SHADOW_WR
+ * Write 0x6:
+ * Bit 1 = Global Shadow wr(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
+
+ /* Get the upper and lower count */
+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) |
+ ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32);
+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) |
+ ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32);
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_9_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_9_get_pcie_data_offset,
+ .get_pcie_index_hi_offset = nbio_v7_9_get_pcie_index_hi_offset,
+ .get_rev_id = nbio_v7_9_get_rev_id,
+ .mc_access_enable = nbio_v7_9_mc_access_enable,
+ .get_memsize = nbio_v7_9_get_memsize,
+ .sdma_doorbell_range = nbio_v7_9_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_9_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_9_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_9_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_9_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_9_enable_doorbell_interrupt,
+ .update_medium_grain_clock_gating = nbio_v7_9_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_9_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_9_get_clockgating_state,
+ .ih_control = nbio_v7_9_ih_control,
+ .remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
+ .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
+ .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .init_registers = nbio_v7_9_init_registers,
+ .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count,
+ .get_pcie_usage = nbio_v7_9_get_pcie_usage,
+};
+
+static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ return;
+}
+
+static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ if (!ras->disable_ras_err_cnt_harvest) {
+ /*
+ * clear error status after ras_controller_intr
+ * according to hw team and count ue number
+ * for query
+ */
+ nbio_v7_9_query_ras_error_count(adev, &err_data);
+
+ /* logging on error cnt and printing for awareness */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+
+ if (err_data.ce_count)
+ dev_info(adev->dev, "%ld correctable hardware "
+ "errors detected in %s block, "
+ "no user action is needed.\n",
+ obj->err_data.ce_count,
+ get_ras_block_str(adev->nbio.ras_if));
+
+ if (err_data.ue_count)
+ dev_info(adev->dev, "%ld uncorrectable hardware "
+ "errors detected in %s block\n",
+ obj->err_data.ue_count,
+ get_ras_block_str(adev->nbio.ras_if));
+ }
+
+ dev_info(adev->dev, "RAS controller interrupt triggered "
+ "by NBIF error\n");
+ }
+}
+
+static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* Dummy function, there is no initialization operation in driver */
+
+ return 0;
+}
+
+static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = {
+ .set = nbio_v7_9_set_ras_controller_irq_state,
+ .process = nbio_v7_9_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_9_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_9_process_err_event_athub_irq,
+};
+
+static int nbio_v7_9_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_9_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_9_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_9_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+const struct amdgpu_ras_block_hw_ops nbio_v7_9_ras_hw_ops = {
+ .query_ras_error_count = nbio_v7_9_query_ras_error_count,
+};
+
+struct amdgpu_nbio_ras nbio_v7_9_ras = {
+ .ras_block = {
+ .ras_comm = {
+ .name = "pcie_bif",
+ .block = AMDGPU_RAS_BLOCK__PCIE_BIF,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ },
+ .hw_ops = &nbio_v7_9_ras_hw_ops,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
+ },
+ .handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_9_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_9_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
new file mode 100644
index 000000000..737097719
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_9_H__
+#define __NBIO_V7_9_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_9_funcs;
+extern struct amdgpu_nbio_ras nbio_v7_9_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
new file mode 100644
index 000000000..13aca808e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -0,0 +1,1138 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "mp/mp_11_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "gmc_v10_0.h"
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
+#include "nbio_v7_2.h"
+#include "hdp_v5_0.h"
+#include "nv.h"
+#include "navi10_ih.h"
+#include "gfx_v10_0.h"
+#include "sdma_v5_0.h"
+#include "sdma_v5_2.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v3_0.h"
+#include "jpeg_v3_0.h"
+#include "amdgpu_vkms.h"
+#include "mes_v10_1.h"
+#include "mxgpu_nv.h"
+#include "smuio_v11_0.h"
+#include "smuio_v11_0_6.h"
+
+static const struct amd_ip_funcs nv_common_ip_funcs;
+
+/* Navi */
+static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static const struct amdgpu_video_codecs nv_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(nv_video_codecs_encode_array),
+ .codec_array = nv_video_codecs_encode_array,
+};
+
+/* Navi1x */
+static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs nv_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(nv_video_codecs_decode_array),
+ .codec_array = nv_video_codecs_decode_array,
+};
+
+/* Sienna Cichlid */
+static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_encode_array),
+ .codec_array = sc_video_codecs_encode_array,
+};
+
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn0),
+ .codec_array = sc_video_codecs_decode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn1),
+ .codec_array = sc_video_codecs_decode_array_vcn1,
+};
+
+/* SRIOV Sienna Cichlid, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ .codec_array = sriov_sc_video_codecs_encode_array,
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_sc_video_codecs_decode_array_vcn1,
+};
+
+/* Beige Goby*/
+static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array),
+ .codec_array = bg_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+/* Yellow Carp*/
+static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array),
+ .codec_array = yc_video_codecs_decode_array,
+};
+
+static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 64):
+ case IP_VERSION(3, 0, 192):
+ if (amdgpu_sriov_vf(adev)) {
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_sc_video_codecs_encode;
+ else
+ *codecs = &sriov_sc_video_codecs_decode_vcn0;
+ }
+ } else {
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn0;
+ }
+ }
+ return 0;
+ case IP_VERSION(3, 0, 16):
+ case IP_VERSION(3, 0, 2):
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &sc_video_codecs_decode_vcn0;
+ return 0;
+ case IP_VERSION(3, 1, 1):
+ case IP_VERSION(3, 1, 2):
+ if (encode)
+ *codecs = &sc_video_codecs_encode;
+ else
+ *codecs = &yc_video_codecs_decode;
+ return 0;
+ case IP_VERSION(3, 0, 33):
+ if (encode)
+ *codecs = &bg_video_codecs_encode;
+ else
+ *codecs = &bg_video_codecs_decode;
+ return 0;
+ case IP_VERSION(2, 0, 0):
+ case IP_VERSION(2, 0, 2):
+ if (encode)
+ *codecs = &nv_video_codecs_encode;
+ else
+ *codecs = &nv_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 nv_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 nv_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void nv_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static bool nv_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+};
+
+static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t nv_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return nv_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {
+ en = &nv_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = nv_get_register_value(adev,
+ nv_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int nv_asic_mode2_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ ret = amdgpu_dpm_mode2_reset(adev);
+ if (ret)
+ dev_err(adev->dev, "GPU mode2 reset failed\n");
+
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+
+static enum amd_reset_method
+nv_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
+ amdgpu_reset_method == AMD_RESET_METHOD_PCI)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ return AMD_RESET_METHOD_MODE2;
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ return AMD_RESET_METHOD_MODE1;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int nv_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (nv_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = nv_asic_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static int nv_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void nv_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+
+}
+
+const struct amdgpu_ip_block_version nv_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &nv_common_ip_funcs,
+};
+
+void nv_set_virt_ops(struct amdgpu_device *adev)
+{
+ adev->virt.ops = &xgpu_nv_virt_ops;
+}
+
+static bool nv_need_full_reset(struct amdgpu_device *adev)
+{
+ return true;
+}
+
+static bool nv_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static void nv_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;
+ adev->doorbell_index.sdma_engine[3] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void nv_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->enable_aspm) &&
+ amdgpu_device_should_use_aspm(adev))
+ adev->nbio.funcs->enable_aspm(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs nv_asic_funcs = {
+ .read_disabled_bios = &nv_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &nv_read_register,
+ .reset = &nv_asic_reset,
+ .reset_method = &nv_asic_reset_method,
+ .get_xclk = &nv_get_xclk,
+ .set_uvd_clocks = &nv_set_uvd_clocks,
+ .set_vce_clocks = &nv_set_vce_clocks,
+ .get_config_memsize = &nv_get_config_memsize,
+ .init_doorbell_index = &nv_init_doorbell_index,
+ .need_full_reset = &nv_need_full_reset,
+ .need_reset_on_init = &nv_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &nv_pre_asic_init,
+ .update_umd_stable_pstate = &nv_update_umd_stable_pstate,
+ .query_video_codecs = &nv_query_video_codecs,
+};
+
+static int nv_common_early_init(void *handle)
+{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ }
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &nv_didt_rreg;
+ adev->didt_wreg = &nv_didt_wreg;
+
+ adev->asic_funcs = &nv_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ /* TODO: split the GC and PG flags based on the relevant IP version for which
+ * they are relevant.
+ */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 1, 10):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ case IP_VERSION(10, 1, 1):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 20;
+ break;
+ case IP_VERSION(10, 1, 2):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB;
+ /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+ * as a consequence, the rev_id and external_rev_id are wrong.
+ * workaround it by hardcoding rev_id to 0 (default value).
+ */
+ if (amdgpu_sriov_vf(adev))
+ adev->rev_id = 0;
+ adev->external_rev_id = adev->rev_id + 0xa;
+ break;
+ case IP_VERSION(10, 3, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ if (amdgpu_sriov_vf(adev)) {
+ /* hypervisor control CG and PG enablement */
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ }
+ adev->external_rev_id = adev->rev_id + 0x28;
+ break;
+ case IP_VERSION(10, 3, 2):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x32;
+ break;
+ case IP_VERSION(10, 3, 1):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ if (adev->apu_flags & AMD_APU_IS_VANGOGH)
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 3, 4):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
+ case IP_VERSION(10, 3, 5):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x46;
+ break;
+ case IP_VERSION(10, 3, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ if (adev->pdev->device == 0x1681)
+ adev->external_rev_id = 0x20;
+ else
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x82;
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x01;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+ adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG);
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int nv_common_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_sc_video_codecs_encode_array,
+ ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
+ sriov_sc_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0));
+ }
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int nv_common_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int nv_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->nbio.funcs->apply_lc_spc_mode_wa)
+ adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
+
+ if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
+ adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
+
+ /* enable aspm */
+ nv_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->remap_hdp_registers(adev);
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int nv_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because nv_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ return 0;
+}
+
+static int nv_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return nv_common_hw_fini(adev);
+}
+
+static int nv_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return nv_common_hw_init(adev);
+}
+
+static bool nv_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int nv_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(2, 3, 0):
+ case IP_VERSION(2, 3, 1):
+ case IP_VERSION(2, 3, 2):
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ case IP_VERSION(3, 3, 3):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->smuio.funcs->update_rom_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int nv_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* TODO */
+ return 0;
+}
+
+static void nv_common_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ adev->smuio.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs nv_common_ip_funcs = {
+ .name = "nv_common",
+ .early_init = nv_common_early_init,
+ .late_init = nv_common_late_init,
+ .sw_init = nv_common_sw_init,
+ .sw_fini = nv_common_sw_fini,
+ .hw_init = nv_common_hw_init,
+ .hw_fini = nv_common_hw_fini,
+ .suspend = nv_common_suspend,
+ .resume = nv_common_resume,
+ .is_idle = nv_common_is_idle,
+ .wait_for_idle = nv_common_wait_for_idle,
+ .soft_reset = nv_common_soft_reset,
+ .set_clockgating_state = nv_common_set_clockgating_state,
+ .set_powergating_state = nv_common_set_powergating_state,
+ .get_clockgating_state = nv_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
new file mode 100644
index 000000000..83e9782ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV_H__
+#define __NV_H__
+
+#include "nbio_v2_3.h"
+
+extern const struct amdgpu_ip_block_version nv_common_ip_block;
+
+void nv_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+void nv_set_virt_ops(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
new file mode 100644
index 000000000..631dafb92
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef NVD_H
+#define NVD_H
+
+/**
+ * Navi's PM4 definitions
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_INDIRECT_BUFFER_END 0x17
+#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
+#define PACKET3_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_COND_INDIRECT_BUFFER 0x3F
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_RELEASE_MEM 0x49
+#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
+#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
+#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12)
+#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13)
+#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
+#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15)
+#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
+#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17)
+#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
+#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
+#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
+#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22)
+#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - cache_policy__me_release_mem__lru
+ * 1 - cache_policy__me_release_mem__stream
+ * 2 - cache_policy__me_release_mem__noa
+ * 3 - cache_policy__me_release_mem__bypass
+ */
+#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
+
+#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+
+
+
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [31:26] | BYTE_COUNT [25:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_CONTEXT_REG_RMW 0x51
+#define PACKET3_GFX_CNTX_UPDATE 0x52
+#define PACKET3_BLK_CNTX_UPDATE 0x53
+#define PACKET3_INCR_UPDT_STATE 0x55
+#define PACKET3_ACQUIRE_MEM 0x58
+/* 1. HEADER
+ * 2. COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 2. COHER_SIZE [31:0]
+ * 3. COHER_SIZE_HI [7:0]
+ * 4. COHER_BASE_LO [31:0]
+ * 5. COHER_BASE_HI [23:0]
+ * 7. POLL_INTERVAL [15:0]
+ * 8. GCR_CNTL [18:0]
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
+ /*
+ * 0:NOP
+ * 1:ALL
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
+ /*
+ * 0:ALL
+ * 1:reserved
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
+ /*
+ * 0:ALL
+ * 1:VOL
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
+ /*
+ * 0: PARALLEL
+ * 1: FORWARD
+ * 2: REVERSE
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_REWIND 0x59
+#define PACKET3_INTERRUPT 0x5A
+#define PACKET3_GEN_PDEPTE 0x5B
+#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
+#define PACKET3_PRIME_UTCL2 0x5D
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_LOAD_COMPUTE_STATE 0x62
+#define PACKET3_LOAD_SH_REG_INDEX 0x63
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
+#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
+#define PACKET3_SET_SH_REG_DI 0x72
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG_DI_MULTI 0x74
+#define PACKET3_GFX_PIPE_LOCK 0x75
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
+#define PACKET3_FORWARD_HEADER 0x7C
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
+#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
+#define PACKET3_DISPATCH_DRAW 0x8D
+#define PACKET3_DISPATCH_DRAW_ACE 0x8D
+#define PACKET3_GET_LOD_STATS 0x8E
+#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
+#define PACKET3_FRAME_CONTROL 0x90
+# define FRAME_TMZ (1 << 0)
+# define FRAME_CMD(x) ((x) << 28)
+ /*
+ * x=0: tmz_begin
+ * x=1: tmz_end
+ */
+#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
+#define PACKET3_WAIT_REG_MEM64 0x93
+#define PACKET3_COND_PREEMPT 0x94
+#define PACKET3_HDP_FLUSH 0x95
+#define PACKET3_COPY_DATA_RB 0x96
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+#define PACKET3_AQL_PACKET 0x99
+#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
+#define PACKET3_SET_SH_REG_INDEX 0x9B
+#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
+#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
+#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
+#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
+#define PACKET3_SET_RESOURCES 0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
+# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
+#define PACKET3_MAP_PROCESS 0xA1
+#define PACKET3_MAP_QUEUES 0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
+# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
+# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
+# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
+# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
+# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2 */
+# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
+# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+#define PACKET3_UNMAP_QUEUES 0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
+ /* 0 - PREEMPT_QUEUES
+ * 1 - RESET_QUEUES
+ * 2 - DISABLE_PROCESS_QUEUES
+ * 3 - PREEMPT_QUEUES_NO_UNMAP
+ */
+# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2a */
+# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
+/* CONTROL4 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define PACKET3_QUERY_STATUS 0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
+# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
+# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
+/* CONTROL2a */
+# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_LIST 0xA5
+#define PACKET3_MAP_PROCESS_VM 0xA6
+/* GFX11 */
+#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
+# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
+# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
new file mode 100644
index 000000000..18917df78
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -0,0 +1,466 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _PSP_TEE_GFX_IF_H_
+#define _PSP_TEE_GFX_IF_H_
+
+#define PSP_GFX_CMD_BUF_VERSION 0x00000001
+
+#define GFX_CMD_STATUS_MASK 0x0000FFFF
+#define GFX_CMD_ID_MASK 0x000F0000
+#define GFX_CMD_RESERVED_MASK 0x7FF00000
+#define GFX_CMD_RESPONSE_MASK 0x80000000
+
+/* USBC PD FW version retrieval command */
+#define C2PMSG_CMD_GFX_USB_PD_FW_VER 0x2000000
+
+/* TEE Gfx Command IDs for the register interface.
+* Command ID must be between 0x00010000 and 0x000F0000.
+*/
+enum psp_gfx_crtl_cmd_id
+{
+ GFX_CTRL_CMD_ID_INIT_RBI_RING = 0x00010000, /* initialize RBI ring */
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */
+ GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */
+ GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */
+ GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
+ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */
+ GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */
+
+ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
+};
+
+
+/*-----------------------------------------------------------------------------
+ NOTE: All physical addresses used in this interface are actually
+ GPU Virtual Addresses.
+*/
+
+
+/* Control registers of the TEE Gfx interface. These are located in
+* SRBM-to-PSP mailbox registers (total 8 registers).
+*/
+struct psp_gfx_ctrl
+{
+ volatile uint32_t cmd_resp; /* +0 Command/Response register for Gfx commands */
+ volatile uint32_t rbi_wptr; /* +4 Write pointer (index) of RBI ring */
+ volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */
+ volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */
+ volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */
+ volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
+ volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
+ volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */
+
+};
+
+
+/* Response flag is set in the command when command is completed by PSP.
+* Used in the GFX_CTRL.CmdResp.
+* When PSP GFX I/F is initialized, the flag is set.
+*/
+#define GFX_FLAG_RESPONSE 0x80000000
+
+/* TEE Gfx Command IDs for the ring buffer interface. */
+enum psp_gfx_cmd_id
+{
+ GFX_CMD_ID_LOAD_TA = 0x00000001, /* load TA */
+ GFX_CMD_ID_UNLOAD_TA = 0x00000002, /* unload TA */
+ GFX_CMD_ID_INVOKE_CMD = 0x00000003, /* send command to TA */
+ GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */
+ GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */
+ GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
+ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
+ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
+ GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
+ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
+ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
+ GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */
+ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
+ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
+ GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
+ GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
+ GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
+};
+
+/* PSP boot config sub-commands */
+enum psp_gfx_boot_config_cmd
+{
+ BOOTCFG_CMD_SET = 1, /* Set boot configuration settings */
+ BOOTCFG_CMD_GET = 2, /* Get boot configuration settings */
+ BOOTCFG_CMD_INVALIDATE = 3 /* Reset current boot configuration settings to VBIOS defaults */
+};
+
+/* PSP boot config bitmask values */
+enum psp_gfx_boot_config
+{
+ BOOT_CONFIG_GECC = 0x1,
+};
+
+/* Command to load Trusted Application binary into PSP OS. */
+struct psp_gfx_cmd_load_ta
+{
+ uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
+ uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */
+ uint32_t app_len; /* length of the TA binary in bytes */
+ uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */
+ uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */
+
+ /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
+ * for the TA. Each InvokeCommand can have dinamically mapped CMD buffer instead
+ * of using global persistent buffer.
+ */
+};
+
+
+/* Command to Unload Trusted Application binary from PSP OS. */
+struct psp_gfx_cmd_unload_ta
+{
+ uint32_t session_id; /* Session ID of the loaded TA to be unloaded */
+
+};
+
+
+/* Shared buffers for InvokeCommand.
+*/
+struct psp_gfx_buf_desc
+{
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */
+ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
+
+};
+
+/* Max number of descriptors for one shared buffer (in how many different
+* physical locations one shared buffer can be stored). If buffer is too much
+* fragmented, error will be returned.
+*/
+#define GFX_BUF_MAX_DESC 64
+
+struct psp_gfx_buf_list
+{
+ uint32_t num_desc; /* number of buffer descriptors in the list */
+ uint32_t total_size; /* total size of all buffers in the list in bytes (must be multiple of 4 KB) */
+ struct psp_gfx_buf_desc buf_desc[GFX_BUF_MAX_DESC]; /* list of buffer descriptors */
+
+ /* total 776 bytes */
+};
+
+/* Command to execute InvokeCommand entry point of the TA. */
+struct psp_gfx_cmd_invoke_cmd
+{
+ uint32_t session_id; /* Session ID of the TA to be executed */
+ uint32_t ta_cmd_id; /* Command ID to be sent to TA */
+ struct psp_gfx_buf_list buf; /* one indirect buffer (scatter/gather list) */
+
+};
+
+
+/* Command to setup TMR region. */
+struct psp_gfx_cmd_setup_tmr
+{
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
+ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
+ union {
+ struct {
+ uint32_t sriov_enabled:1; /* whether the device runs under SR-IOV*/
+ uint32_t virt_phy_addr:1; /* driver passes both virtual and physical address to PSP*/
+ uint32_t reserved:30;
+ } bitfield;
+ uint32_t tmr_flags;
+ };
+ uint32_t system_phy_addr_lo; /* bits [31:0] of system physical address of TMR buffer (must be 4 KB aligned) */
+ uint32_t system_phy_addr_hi; /* bits [63:32] of system physical address of TMR buffer */
+
+};
+
+/* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */
+enum psp_gfx_fw_type {
+ GFX_FW_TYPE_NONE = 0, /* */
+ GFX_FW_TYPE_CP_ME = 1, /* CP-ME VG + RV */
+ GFX_FW_TYPE_CP_PFP = 2, /* CP-PFP VG + RV */
+ GFX_FW_TYPE_CP_CE = 3, /* CP-CE VG + RV */
+ GFX_FW_TYPE_CP_MEC = 4, /* CP-MEC FW VG + RV */
+ GFX_FW_TYPE_CP_MEC_ME1 = 5, /* CP-MEC Jump Table 1 VG + RV */
+ GFX_FW_TYPE_CP_MEC_ME2 = 6, /* CP-MEC Jump Table 2 VG */
+ GFX_FW_TYPE_RLC_V = 7, /* RLC-V VG */
+ GFX_FW_TYPE_RLC_G = 8, /* RLC-G VG + RV */
+ GFX_FW_TYPE_SDMA0 = 9, /* SDMA0 VG + RV */
+ GFX_FW_TYPE_SDMA1 = 10, /* SDMA1 VG */
+ GFX_FW_TYPE_DMCU_ERAM = 11, /* DMCU-ERAM VG + RV */
+ GFX_FW_TYPE_DMCU_ISR = 12, /* DMCU-ISR VG + RV */
+ GFX_FW_TYPE_VCN = 13, /* VCN RV */
+ GFX_FW_TYPE_UVD = 14, /* UVD VG */
+ GFX_FW_TYPE_VCE = 15, /* VCE VG */
+ GFX_FW_TYPE_ISP = 16, /* ISP RV */
+ GFX_FW_TYPE_ACP = 17, /* ACP RV */
+ GFX_FW_TYPE_SMU = 18, /* SMU VG */
+ GFX_FW_TYPE_MMSCH = 19, /* MMSCH VG */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, /* RLC GPM VG + RV */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, /* RLC SRM VG + RV */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, /* RLC CNTL VG + RV */
+ GFX_FW_TYPE_UVD1 = 23, /* UVD1 VG-20 */
+ GFX_FW_TYPE_TOC = 24, /* TOC NV-10 */
+ GFX_FW_TYPE_RLC_P = 25, /* RLC P NV */
+ GFX_FW_TYPE_RLC_IRAM = 26, /* RLC_IRAM NV */
+ GFX_FW_TYPE_GLOBAL_TAP_DELAYS = 27, /* GLOBAL TAP DELAYS NV */
+ GFX_FW_TYPE_SE0_TAP_DELAYS = 28, /* SE0 TAP DELAYS NV */
+ GFX_FW_TYPE_SE1_TAP_DELAYS = 29, /* SE1 TAP DELAYS NV */
+ GFX_FW_TYPE_GLOBAL_SE0_SE1_SKEW_DELAYS = 30, /* GLOBAL SE0/1 SKEW DELAYS NV */
+ GFX_FW_TYPE_SDMA0_JT = 31, /* SDMA0 JT NV */
+ GFX_FW_TYPE_SDMA1_JT = 32, /* SDNA1 JT NV */
+ GFX_FW_TYPE_CP_MES = 33, /* CP MES NV */
+ GFX_FW_TYPE_MES_STACK = 34, /* MES STACK NV */
+ GFX_FW_TYPE_RLC_SRM_DRAM_SR = 35, /* RLC SRM DRAM NV */
+ GFX_FW_TYPE_RLCG_SCRATCH_SR = 36, /* RLCG SCRATCH NV */
+ GFX_FW_TYPE_RLCP_SCRATCH_SR = 37, /* RLCP SCRATCH NV */
+ GFX_FW_TYPE_RLCV_SCRATCH_SR = 38, /* RLCV SCRATCH NV */
+ GFX_FW_TYPE_RLX6_DRAM_SR = 39, /* RLX6 DRAM NV */
+ GFX_FW_TYPE_SDMA0_PG_CONTEXT = 40, /* SDMA0 PG CONTEXT NV */
+ GFX_FW_TYPE_SDMA1_PG_CONTEXT = 41, /* SDMA1 PG CONTEXT NV */
+ GFX_FW_TYPE_GLOBAL_MUX_SELECT_RAM = 42, /* GLOBAL MUX SEL RAM NV */
+ GFX_FW_TYPE_SE0_MUX_SELECT_RAM = 43, /* SE0 MUX SEL RAM NV */
+ GFX_FW_TYPE_SE1_MUX_SELECT_RAM = 44, /* SE1 MUX SEL RAM NV */
+ GFX_FW_TYPE_ACCUM_CTRL_RAM = 45, /* ACCUM CTRL RAM NV */
+ GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */
+ GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */
+ GFX_FW_TYPE_RLC_DRAM_BOOT = 48, /* RLC DRAM BOOT NV */
+ GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */
+ GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */
+ GFX_FW_TYPE_DMUB = 51, /* DMUB RN */
+ GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */
+ GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */
+ GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */
+ GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */
+ GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
+ GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
+ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
+ GFX_FW_TYPE_CAP = 62, /* CAP_FW */
+ GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */
+ GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */
+ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
+ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
+ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */
+ GFX_FW_TYPE_LSDMA = 70, /* LSDMA FW SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH0 = 71, /* SDMA Thread 0/CTX SOC21 */
+ GFX_FW_TYPE_SDMA_UCODE_TH1 = 72, /* SDMA Thread 1/CTL SOC21 */
+ GFX_FW_TYPE_PPTABLE = 73, /* PPTABLE SOC21 */
+ GFX_FW_TYPE_DISCRETE_USB4 = 74, /* dUSB4 FW SOC21 */
+ GFX_FW_TYPE_TA = 75, /* SRIOV TA FW UUID SOC21 */
+ GFX_FW_TYPE_RS64_MES = 76, /* RS64 MES ucode SOC21 */
+ GFX_FW_TYPE_RS64_MES_STACK = 77, /* RS64 MES stack ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ = 78, /* RS64 KIQ ucode SOC21 */
+ GFX_FW_TYPE_RS64_KIQ_STACK = 79, /* RS64 KIQ Heap stack SOC21 */
+ GFX_FW_TYPE_ISP_DATA = 80, /* ISP DATA SOC21 */
+ GFX_FW_TYPE_CP_MES_KIQ = 81, /* MES KIQ ucode SOC21 */
+ GFX_FW_TYPE_MES_KIQ_STACK = 82, /* MES KIQ stack SOC21 */
+ GFX_FW_TYPE_UMSCH_DATA = 83, /* User Mode Scheduler Data SOC21 */
+ GFX_FW_TYPE_UMSCH_UCODE = 84, /* User Mode Scheduler Ucode SOC21 */
+ GFX_FW_TYPE_UMSCH_CMD_BUFFER = 85, /* User Mode Scheduler Command Buffer SOC21 */
+ GFX_FW_TYPE_USB_DP_COMBO_PHY = 86, /* USB-Display port Combo SOC21 */
+ GFX_FW_TYPE_RS64_PFP = 87, /* RS64 PFP SOC21 */
+ GFX_FW_TYPE_RS64_ME = 88, /* RS64 ME SOC21 */
+ GFX_FW_TYPE_RS64_MEC = 89, /* RS64 MEC SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P0_STACK = 90, /* RS64 PFP stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_PFP_P1_STACK = 91, /* RS64 PFP stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P0_STACK = 92, /* RS64 ME stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_ME_P1_STACK = 93, /* RS64 ME stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P0_STACK = 94, /* RS64 MEC stack P0 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
+ GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
+ GFX_FW_TYPE_MAX
+};
+
+/* Command to load HW IP FW. */
+struct psp_gfx_cmd_load_ip_fw
+{
+ uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+ uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
+ uint32_t fw_size; /* FW buffer size in bytes */
+ enum psp_gfx_fw_type fw_type; /* FW type */
+
+};
+
+/* Command to save/restore HW IP FW. */
+struct psp_gfx_cmd_save_restore_ip_fw
+{
+ uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/
+ uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
+ uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
+ uint32_t buf_size; /* Size of the save/restore buffer in bytes */
+ enum psp_gfx_fw_type fw_type; /* FW type */
+};
+
+/* Command to setup register program */
+struct psp_gfx_cmd_reg_prog {
+ uint32_t reg_value;
+ uint32_t reg_id;
+};
+
+/* Command to load TOC */
+struct psp_gfx_cmd_load_toc
+{
+ uint32_t toc_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+ uint32_t toc_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
+ uint32_t toc_size; /* FW buffer size in bytes */
+};
+
+/* Dynamic boot configuration */
+struct psp_gfx_cmd_boot_cfg
+{
+ uint32_t timestamp; /* calendar time as number of seconds */
+ enum psp_gfx_boot_config_cmd sub_cmd; /* sub-command indicating how to process command data */
+ uint32_t boot_config; /* dynamic boot configuration bitmask */
+ uint32_t boot_config_valid; /* dynamic boot configuration valid bits bitmask */
+};
+
+struct psp_gfx_cmd_sriov_spatial_part {
+ uint32_t mode;
+ uint32_t override_ips;
+ uint32_t override_xcds_avail;
+ uint32_t override_this_aid;
+};
+
+/* All GFX ring buffer commands. */
+union psp_gfx_commands
+{
+ struct psp_gfx_cmd_load_ta cmd_load_ta;
+ struct psp_gfx_cmd_unload_ta cmd_unload_ta;
+ struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd;
+ struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
+ struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
+ struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
+ struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
+ struct psp_gfx_cmd_setup_tmr cmd_setup_vmr;
+ struct psp_gfx_cmd_load_toc cmd_load_toc;
+ struct psp_gfx_cmd_boot_cfg boot_cfg;
+ struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
+};
+
+struct psp_gfx_uresp_reserved
+{
+ uint32_t reserved[8];
+};
+
+/* Command-specific response for Fw Attestation Db */
+struct psp_gfx_uresp_fwar_db_info
+{
+ uint32_t fwar_db_addr_lo;
+ uint32_t fwar_db_addr_hi;
+};
+
+/* Command-specific response for boot config. */
+struct psp_gfx_uresp_bootcfg {
+ uint32_t boot_cfg; /* boot config data */
+};
+
+/* Union of command-specific responses for GPCOM ring. */
+union psp_gfx_uresp {
+ struct psp_gfx_uresp_reserved reserved;
+ struct psp_gfx_uresp_bootcfg boot_cfg;
+ struct psp_gfx_uresp_fwar_db_info fwar_db_info;
+};
+
+/* Structure of GFX Response buffer.
+* For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI
+* it is separate buffer.
+*/
+struct psp_gfx_resp
+{
+ uint32_t status; /* +0 status of command execution */
+ uint32_t session_id; /* +4 session ID in response to LoadTa command */
+ uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */
+ uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */
+ uint32_t tmr_size; /* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */
+
+ uint32_t reserved[11];
+
+ union psp_gfx_uresp uresp; /* +64 response union containing command-specific responses */
+
+ /* total 96 bytes */
+};
+
+/* Structure of Command buffer pointed by psp_gfx_rb_frame.cmd_buf_addr_hi
+* and psp_gfx_rb_frame.cmd_buf_addr_lo.
+*/
+struct psp_gfx_cmd_resp
+{
+ uint32_t buf_size; /* +0 total size of the buffer in bytes */
+ uint32_t buf_version; /* +4 version of the buffer strusture; must be PSP_GFX_CMD_BUF_VERSION */
+ uint32_t cmd_id; /* +8 command ID */
+
+ /* These fields are used for RBI only. They are all 0 in GPCOM commands
+ */
+ uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
+ uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */
+ uint32_t resp_offset; /* +20 offset within response buffer */
+ uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */
+
+ union psp_gfx_commands cmd; /* +28 command specific structures */
+
+ uint8_t reserved_1[864 - sizeof(union psp_gfx_commands) - 28];
+
+ /* Note: Resp is part of this buffer for GPCOM ring. For RBI ring the response
+ * is separate buffer pointed by resp_buf_addr_hi and resp_buf_addr_lo.
+ */
+ struct psp_gfx_resp resp; /* +864 response */
+
+ uint8_t reserved_2[1024 - 864 - sizeof(struct psp_gfx_resp)];
+
+ /* total size 1024 bytes */
+};
+
+
+#define FRAME_TYPE_DESTROY 1 /* frame sent by KMD driver when UMD Scheduler context is destroyed*/
+
+/* Structure of the Ring Buffer Frame */
+struct psp_gfx_rb_frame
+{
+ uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */
+ uint32_t cmd_buf_size; /* +8 command buffer size in bytes */
+ uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
+ uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
+ uint32_t fence_value; /* +20 Fence value */
+ uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */
+ uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */
+ uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */
+ uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
+ uint8_t reserved1[2]; /* +34 reserved, must be 0 */
+ uint32_t reserved2[7]; /* +36 reserved, must be 0 */
+ /* total 64 bytes */
+};
+
+#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
+
+enum tee_error_code {
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+};
+
+#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
new file mode 100644
index 000000000..5f10883da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v10_0.h"
+
+#include "mp/mp_10_0_offset.h"
+#include "gc/gc_9_1_offset.h"
+#include "sdma0/sdma0_4_1_offset.h"
+
+MODULE_FIRMWARE("amdgpu/raven_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
+MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
+
+static int psp_v10_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 1, 0)) &&
+ (adev->pdev->revision == 0xa1) &&
+ (psp->securedisplay_context.context.bin_desc.fw_version >= 0x27000008)) {
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
+ }
+ return err;
+}
+
+static int psp_v10_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* There might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+
+ return ret;
+}
+
+static int psp_v10_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command to C2PMSG_64 */
+ psp_ring_reg = 3 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* There might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v10_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v10_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static int psp_v10_0_mode1_reset(struct psp_context *psp)
+{
+ DRM_INFO("psp mode 1 reset not supported now! \n");
+ return -EINVAL;
+}
+
+static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+}
+
+static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v10_0_funcs = {
+ .init_microcode = psp_v10_0_init_microcode,
+ .ring_create = psp_v10_0_ring_create,
+ .ring_stop = psp_v10_0_ring_stop,
+ .ring_destroy = psp_v10_0_ring_destroy,
+ .mode1_reset = psp_v10_0_mode1_reset,
+ .ring_get_wptr = psp_v10_0_ring_get_wptr,
+ .ring_set_wptr = psp_v10_0_ring_set_wptr,
+};
+
+void psp_v10_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v10_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
new file mode 100644
index 000000000..20c2a9485
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+#ifndef __PSP_V10_0_H__
+#define __PSP_V10_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v10_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
new file mode 100644
index 000000000..8f84fe40a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -0,0 +1,667 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v11_0.h"
+
+#include "mp/mp_11_0_offset.h"
+#include "mp/mp_11_0_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "nbio/nbio_7_4_offset.h"
+
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi10_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi12_cap.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_cap.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_asd.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_toc.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sos.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_ta.bin");
+
+/* address block */
+#define smnMP1_FIRMWARE_FLAGS 0x3010024
+/* navi10 reg offset define */
+#define mmRLC_GPM_UCODE_ADDR_NV10 0x5b61
+#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62
+#define mmSDMA0_UCODE_ADDR_NV10 0x5880
+#define mmSDMA0_UCODE_DATA_NV10 0x5881
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+static int psp_v11_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 4):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 5):
+ case IP_VERSION(11, 0, 9):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
+ break;
+ case IP_VERSION(11, 0, 7):
+ case IP_VERSION(11, 0, 11):
+ case IP_VERSION(11, 0, 12):
+ case IP_VERSION(11, 0, 13):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ break;
+ case IP_VERSION(11, 5, 0):
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ break;
+ default:
+ BUG();
+ }
+
+ return err;
+}
+
+static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000,
+ 0x80000000,
+ false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v11_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v11_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v11_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ /* Copy PSP System Driver binary to memory */
+ psp_copy_fw(psp, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the sys driver to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v11_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v11_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v11_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v11_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v11_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v11_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ /* Copy Secure OS binary to PSP memory */
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static int psp_v11_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (amdgpu_sriov_vf(adev))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v11_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ring->ring_wptr = 0;
+ ret = psp_v11_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+
+static int psp_v11_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v11_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static int psp_v11_0_mode1_reset(struct psp_context *psp)
+{
+ int ret;
+ uint32_t offset;
+ struct amdgpu_device *adev = psp->adev;
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+
+ if (ret) {
+ DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ return -EINVAL;
+ }
+
+ /*send the mode 1 reset command*/
+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
+
+ msleep(500);
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+
+ if (ret) {
+ DRM_INFO("psp mode 1 reset failed!\n");
+ return -EINVAL;
+ }
+
+ DRM_INFO("psp mode1 reset succeed \n");
+
+ return 0;
+}
+
+static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ DRM_DEBUG("training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+/*
+ * save and restore process
+ */
+static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ DRM_DEBUG("Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ DRM_ERROR("Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v11_0_is_sos_alive(psp)) {
+ DRM_DEBUG("SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ DRM_DEBUG("Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ DRM_DEBUG("Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ DRM_ERROR("failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ DRM_ERROR("send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = psp->km_ring.ring_wptr;
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ psp->km_ring.ring_wptr = value;
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = 0x%04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static const struct psp_funcs psp_v11_0_funcs = {
+ .init_microcode = psp_v11_0_init_microcode,
+ .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v11_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v11_0_bootloader_load_sos,
+ .ring_create = psp_v11_0_ring_create,
+ .ring_stop = psp_v11_0_ring_stop,
+ .ring_destroy = psp_v11_0_ring_destroy,
+ .mode1_reset = psp_v11_0_mode1_reset,
+ .mem_training = psp_v11_0_memory_training,
+ .ring_get_wptr = psp_v11_0_ring_get_wptr,
+ .ring_set_wptr = psp_v11_0_ring_set_wptr,
+ .load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw
+};
+
+void psp_v11_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v11_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h
new file mode 100644
index 000000000..082c16c88
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V11_0_H__
+#define __PSP_V11_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v11_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
new file mode 100644
index 000000000..5697760a8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v11_0_8.h"
+
+#include "mp/mp_11_0_8_offset.h"
+
+static int psp_v11_0_8_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v11_0_8_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v11_0_8_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v11_0_8_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v11_0_8_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v11_0_8_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v11_0_8_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v11_0_8_funcs = {
+ .ring_create = psp_v11_0_8_ring_create,
+ .ring_stop = psp_v11_0_8_ring_stop,
+ .ring_destroy = psp_v11_0_8_ring_destroy,
+ .ring_get_wptr = psp_v11_0_8_ring_get_wptr,
+ .ring_set_wptr = psp_v11_0_8_ring_set_wptr,
+};
+
+void psp_v11_0_8_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v11_0_8_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h
new file mode 100644
index 000000000..890377a5a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V11_0_8_H__
+#define __PSP_V11_0_8_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v11_0_8_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
new file mode 100644
index 000000000..fcd708eae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v12_0.h"
+
+#include "mp/mp_12_0_0_offset.h"
+#include "mp/mp_12_0_0_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "nbio/nbio_7_4_offset.h"
+
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
+MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");
+
+/* address block */
+#define smnMP1_FIRMWARE_FLAGS 0x3010024
+
+static int psp_v12_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ /* only supported on renoir */
+ if (!(adev->apu_flags & AMD_APU_IS_RENOIR))
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
+
+ return 0;
+}
+
+static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Copy PSP System Driver binary to memory */
+ psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
+
+ /* Provide the sys driver to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 1 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Copy Secure OS binary to PSP memory */
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 2 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static void psp_v12_0_reroute_ih(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t tmp;
+
+ /* Change IH ring for VMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+
+ /* Change IH ring for UMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+}
+
+static int psp_v12_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ psp_v12_0_reroute_ih(psp);
+
+ if (amdgpu_sriov_vf(psp->adev)) {
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v12_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (amdgpu_sriov_vf(adev))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v12_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v12_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static int psp_v12_0_mode1_reset(struct psp_context *psp)
+{
+ int ret;
+ uint32_t offset;
+ struct amdgpu_device *adev = psp->adev;
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+
+ if (ret) {
+ DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ return -EINVAL;
+ }
+
+ /*send the mode 1 reset command*/
+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
+
+ msleep(500);
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+
+ if (ret) {
+ DRM_INFO("psp mode 1 reset failed!\n");
+ return -EINVAL;
+ }
+
+ DRM_INFO("psp mode1 reset succeed \n");
+
+ return 0;
+}
+
+static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v12_0_funcs = {
+ .init_microcode = psp_v12_0_init_microcode,
+ .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v12_0_bootloader_load_sos,
+ .ring_create = psp_v12_0_ring_create,
+ .ring_stop = psp_v12_0_ring_stop,
+ .ring_destroy = psp_v12_0_ring_destroy,
+ .mode1_reset = psp_v12_0_mode1_reset,
+ .ring_get_wptr = psp_v12_0_ring_get_wptr,
+ .ring_set_wptr = psp_v12_0_ring_set_wptr,
+};
+
+void psp_v12_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v12_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h
new file mode 100644
index 000000000..241693ab1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V12_0_H__
+#define __PSP_V12_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v12_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
new file mode 100644
index 000000000..fe1995ed1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -0,0 +1,782 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0.h"
+
+#include "mp/mp_13_0_2_offset.h"
+#include "mp/mp_13_0_2_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+/* Retry times for vmbx ready wait */
+#define PSP_VMBX_POLLING_LIMIT 3000
+
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+static int psp_v13_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 2):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ /* It's not necessary to load ras ta on Guest side */
+ if (!amdgpu_sriov_vf(adev)) {
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ }
+ break;
+ case IP_VERSION(13, 0, 1):
+ case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 5):
+ case IP_VERSION(13, 0, 8):
+ case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ /* It's not necessary to load ras ta on Guest side */
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, ret;
+
+ for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_33 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+ 0x80000000, 0xffffffff, false);
+
+ if (ret == 0)
+ break;
+ }
+
+ if (ret)
+ dev_warn(adev->dev, "Bootloader wait timed out");
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, retry_cnt, ret;
+
+ retry_cnt =
+ (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) ?
+ PSP_VMBX_POLLING_LIMIT :
+ 10;
+ /* Wait for bootloader to signify that it is ready having bit 31 of
+ * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+ * If there is an error in processing command, bits[7:0] will be set.
+ * This is applicable for PSP v13.0.6 and newer.
+ */
+ for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0xffffffff, false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) {
+ psp_v13_0_wait_for_vmbx_ready(psp);
+
+ return psp_v13_0_wait_for_bootloader(psp);
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+
+static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static int psp_v13_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v13_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v13_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v13_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v13_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115);
+}
+
+static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 10)) {
+ uint32_t reg_data;
+ /* MP1 fatal error: trigger PSP dram read to unhalt PSP
+ * during MP1 triggered sync flood.
+ */
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, reg_data + 0x10);
+
+ /* delay 1000ms for the mode1 reset for fatal error
+ * to be recovered back.
+ */
+ msleep(1000);
+ }
+
+ return 0;
+}
+
+static const struct psp_funcs psp_v13_0_funcs = {
+ .init_microcode = psp_v13_0_init_microcode,
+ .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
+ .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_sos = psp_v13_0_bootloader_load_sos,
+ .ring_create = psp_v13_0_ring_create,
+ .ring_stop = psp_v13_0_ring_stop,
+ .ring_destroy = psp_v13_0_ring_destroy,
+ .ring_get_wptr = psp_v13_0_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_ring_set_wptr,
+ .mem_training = psp_v13_0_memory_training,
+ .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
+ .update_spirom = psp_v13_0_update_spirom,
+ .vbflash_stat = psp_v13_0_vbflash_status,
+ .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+};
+
+void psp_v13_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
new file mode 100644
index 000000000..de5677ce4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_H__
+#define __PSP_V13_0_H__
+
+#include "amdgpu_psp.h"
+
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
+void psp_v13_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
new file mode 100644
index 000000000..d5ba58eba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0_4.h"
+
+#include "mp/mp_13_0_4_offset.h"
+#include "mp/mp_13_0_4_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+
+static int psp_v13_0_4_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 4):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_4_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000,
+ 0x80000000,
+ false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_4_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_4_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_4_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_4_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v13_0_4_funcs = {
+ .init_microcode = psp_v13_0_4_init_microcode,
+ .bootloader_load_kdb = psp_v13_0_4_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_4_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_4_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_4_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,
+ .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos,
+ .ring_create = psp_v13_0_4_ring_create,
+ .ring_stop = psp_v13_0_4_ring_stop,
+ .ring_destroy = psp_v13_0_4_ring_destroy,
+ .ring_get_wptr = psp_v13_0_4_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_4_ring_set_wptr,
+};
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_4_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
new file mode 100644
index 000000000..8547b8d51
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_4_H__
+#define __PSP_V13_0_4_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
new file mode 100644
index 000000000..f6b75e3e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v3_1.h"
+
+#include "mp/mp_9_0_offset.h"
+#include "mp/mp_9_0_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "nbio/nbio_6_1_offset.h"
+
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega10_cap.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+
+
+#define smnMP1_FIRMWARE_FLAGS 0x3010028
+
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+
+static int psp_v3_1_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ err = psp_init_asd_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Copy PSP System Driver binary to memory */
+ psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes);
+
+ /* Provide the sys driver to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ /* Copy Secure OS binary to PSP memory */
+ psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ 0, true);
+ return ret;
+}
+
+static void psp_v3_1_reroute_ih(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t tmp;
+
+ /* Change IH ring for VMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+
+ /* Change IH ring for UMC */
+ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
+ tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
+
+ mdelay(20);
+ psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+}
+
+static int psp_v3_1_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ psp_v3_1_reroute_ih(psp);
+
+ if (amdgpu_sriov_vf(adev)) {
+ ring->ring_wptr = 0;
+ ret = psp_v3_1_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+ /* No size initialization for sriov */
+ /* Write the ring initialization command to C2PMSG_101 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_101), 0x80000000,
+ 0x8000FFFF, false);
+ } else {
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_64), 0x80000000,
+ 0x8000FFFF, false);
+
+ }
+ return ret;
+}
+
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (amdgpu_sriov_vf(adev))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v3_1_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v3_1_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg;
+
+ reg = RREG32_PCIE(smnMP1_FIRMWARE_FLAGS | 0x03b00000);
+ return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
+}
+
+static int psp_v3_1_mode1_reset(struct psp_context *psp)
+{
+ int ret;
+ uint32_t offset;
+ struct amdgpu_device *adev = psp->adev;
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+
+ if (ret) {
+ DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ return -EINVAL;
+ }
+
+ /*send the mode 1 reset command*/
+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
+
+ msleep(500);
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+
+ if (ret) {
+ DRM_INFO("psp mode 1 reset failed!\n");
+ return -EINVAL;
+ }
+
+ DRM_INFO("psp mode1 reset succeed \n");
+
+ return 0;
+}
+
+static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = psp->km_ring.ring_wptr;
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+ return data;
+}
+
+static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ /* send interrupt to PSP for SRIOV ring write pointer update */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ psp->km_ring.ring_wptr = value;
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v3_1_funcs = {
+ .init_microcode = psp_v3_1_init_microcode,
+ .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v3_1_bootloader_load_sos,
+ .ring_create = psp_v3_1_ring_create,
+ .ring_stop = psp_v3_1_ring_stop,
+ .ring_destroy = psp_v3_1_ring_destroy,
+ .smu_reload_quirk = psp_v3_1_smu_reload_quirk,
+ .mode1_reset = psp_v3_1_mode1_reset,
+ .ring_get_wptr = psp_v3_1_ring_get_wptr,
+ .ring_set_wptr = psp_v3_1_ring_set_wptr,
+};
+
+void psp_v3_1_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v3_1_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
new file mode 100644
index 000000000..e411e31ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+#ifndef __PSP_V3_1_H__
+#define __PSP_V3_1_H__
+
+#include "amdgpu_psp.h"
+
+enum { PSP_DIRECTORY_TABLE_ENTRIES = 4 };
+enum { PSP_BINARY_ALIGNMENT = 64 };
+enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 };
+enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 };
+
+void psp_v3_1_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_common.h b/drivers/gpu/drm/amd/amdgpu/sdma_common.h
new file mode 100644
index 000000000..8629ef7e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_common.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_COMMON_H__
+#define __SDMA_COMMON_H__
+
+enum sdma_utcl2_cache_read_policy {
+ CACHE_READ_POLICY_L2__LRU = 0x00000000,
+ CACHE_READ_POLICY_L2__STREAM = 0x00000001,
+ CACHE_READ_POLICY_L2__NOA = 0x00000002,
+ CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
+};
+
+enum sdma_utcl2_cache_write_policy {
+ CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
+ CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
+ CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
+ CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
+ CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
+};
+
+#endif /* __SDMA_COMMON_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
new file mode 100644
index 000000000..51afc9299
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -0,0 +1,1274 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "vi.h"
+#include "vid.h"
+
+#include "oss/oss_2_4_d.h"
+#include "oss/oss_2_4_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "iceland_sdma_pkt_open.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
+MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
+
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+{
+ SDMA0_REGISTER_OFFSET,
+ SDMA1_REGISTER_OFFSET
+};
+
+static const u32 golden_settings_iceland_a11[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
+};
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines. These engines are used for compute
+ * and gfx. There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things. It also has support for tiling/detiling of
+ * buffers.
+ */
+
+static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ amdgpu_device_program_register_sequence(adev,
+ iceland_mgcg_cgcg_init,
+ ARRAY_SIZE(iceland_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_iceland_a11,
+ ARRAY_SIZE(golden_settings_iceland_a11));
+ break;
+ default:
+ break;
+ }
+}
+
+static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+}
+
+/**
+ * sdma_v2_4_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0, i;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ chip_name = "topaz";
+ break;
+ default: BUG();
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ if (err)
+ goto out;
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma.instance[i].feature_version >= 20)
+ adev->sdma.instance[i].burst_nop = true;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+out:
+ if (err) {
+ pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ }
+ return err;
+}
+
+/**
+ * sdma_v2_4_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (VI+).
+ */
+static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ /* XXX check if swapping is necessary on BE */
+ return *ring->rptr_cpu_addr >> 2;
+}
+
+/**
+ * sdma_v2_4_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VI+).
+ */
+static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
+
+ return wptr;
+}
+
+/**
+ * sdma_v2_4_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VI+).
+ */
+static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
+}
+
+static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (VI).
+ */
+static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+/**
+ * sdma_v2_4_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 ref_and_mask = 0;
+
+ if (ring->me == 0)
+ ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
+ else
+ ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (VI).
+ */
+static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+/**
+ * sdma_v2_4_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (VI).
+ */
+static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+ ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+ }
+}
+
+/**
+ * sdma_v2_4_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (VI).
+ */
+static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v2_4_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (VI).
+ */
+static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v2_4_gfx_stop(adev);
+ sdma_v2_4_rlc_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
+ if (enable)
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
+ else
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
+ }
+}
+
+/**
+ * sdma_v2_4_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ int i, j, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (j = 0; j < 16; j++) {
+ vi_srbm_select(adev, 0, 0, 0, j);
+ /* SDMA GFX */
+ WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
+ }
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
+ WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+ }
+
+ sdma_v2_4_enable(adev, true);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v2_4_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+ return 0;
+}
+
+
+/**
+ * sdma_v2_4_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v2_4_start(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* halt the engine before programing */
+ sdma_v2_4_enable(adev, false);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v2_4_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v2_4_rlc_resume(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * sdma_v2_4_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v2_4_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (VI).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (CIK).
+ */
+static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+}
+
+/**
+ * sdma_v2_4_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (CIK).
+ */
+static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = pe;
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
+}
+
+/**
+ * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VI).
+ */
+static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for flush */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* reference */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static int sdma_v2_4_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
+ sdma_v2_4_set_ring_funcs(adev);
+ sdma_v2_4_set_buffer_funcs(adev);
+ sdma_v2_4_set_vm_pte_funcs(adev);
+ sdma_v2_4_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v2_4_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ r = sdma_v2_4_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v2_4_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ sdma_v2_4_free_microcode(adev);
+ return 0;
+}
+
+static int sdma_v2_4_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v2_4_init_golden_registers(adev);
+
+ r = sdma_v2_4_start(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v2_4_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v2_4_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v2_4_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v2_4_hw_fini(adev);
+}
+
+static int sdma_v2_4_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v2_4_hw_init(adev);
+}
+
+static bool sdma_v2_4_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS2);
+
+ if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int sdma_v2_4_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK);
+
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v2_4_soft_reset(void *handle)
+{
+ u32 srbm_soft_reset = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS2);
+
+ if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
+ /* sdma0 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+ }
+ if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
+ /* sdma1 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
+ }
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ switch (type) {
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id, queue_id;
+
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ queue_id = (entry->ring_id & 0xc) >> 2;
+ DRM_DEBUG("IH: SDMA trap\n");
+ switch (instance_id) {
+ case 0:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ case 1:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id, queue_id;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ queue_id = (entry->ring_id & 0xc) >> 2;
+
+ if (instance_id <= 1 && queue_id == 0)
+ drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
+ return 0;
+}
+
+static int sdma_v2_4_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ /* XXX handled via the smc on VI */
+ return 0;
+}
+
+static int sdma_v2_4_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
+ .name = "sdma_v2_4",
+ .early_init = sdma_v2_4_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v2_4_sw_init,
+ .sw_fini = sdma_v2_4_sw_fini,
+ .hw_init = sdma_v2_4_hw_init,
+ .hw_fini = sdma_v2_4_hw_fini,
+ .suspend = sdma_v2_4_suspend,
+ .resume = sdma_v2_4_resume,
+ .is_idle = sdma_v2_4_is_idle,
+ .wait_for_idle = sdma_v2_4_wait_for_idle,
+ .soft_reset = sdma_v2_4_soft_reset,
+ .set_clockgating_state = sdma_v2_4_set_clockgating_state,
+ .set_powergating_state = sdma_v2_4_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = false,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v2_4_ring_get_rptr,
+ .get_wptr = sdma_v2_4_ring_get_wptr,
+ .set_wptr = sdma_v2_4_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v2_4_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v2_4_ring_emit_pipeline_sync */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
+ 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
+ .emit_ib = sdma_v2_4_ring_emit_ib,
+ .emit_fence = sdma_v2_4_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
+ .test_ring = sdma_v2_4_ring_test_ring,
+ .test_ib = sdma_v2_4_ring_test_ib,
+ .insert_nop = sdma_v2_4_ring_insert_nop,
+ .pad_ib = sdma_v2_4_ring_pad_ib,
+ .emit_wreg = sdma_v2_4_ring_emit_wreg,
+};
+
+static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
+ .set = sdma_v2_4_set_trap_irq_state,
+ .process = sdma_v2_4_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
+ .process = sdma_v2_4_process_illegal_inst_irq,
+};
+
+static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: unused
+ *
+ * Copy GPU buffers using the DMA engine (VI).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (VI).
+ */
+static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
+ .copy_max_bytes = 0x1fffff,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v2_4_emit_copy_buffer,
+
+ .fill_max_bytes = 0x1fffff,
+ .fill_num_dw = 7,
+ .emit_fill_buffer = sdma_v2_4_emit_fill_buffer,
+};
+
+static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v2_4_vm_copy_pte,
+
+ .write_pte = sdma_v2_4_vm_write_pte,
+ .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
+};
+
+static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 2,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &sdma_v2_4_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h
new file mode 100644
index 000000000..28b433729
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V2_4_H__
+#define __SDMA_V2_4_H__
+
+extern const struct amdgpu_ip_block_version sdma_v2_4_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
new file mode 100644
index 000000000..344202870
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -0,0 +1,1721 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "vi.h"
+#include "vid.h"
+
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "tonga_sdma_pkt_open.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/tonga_sdma.bin");
+MODULE_FIRMWARE("amdgpu/tonga_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin");
+MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/fiji_sdma.bin");
+MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/stoney_sdma.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
+
+
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+{
+ SDMA0_REGISTER_OFFSET,
+ SDMA1_REGISTER_OFFSET
+};
+
+static const u32 golden_settings_tonga_a11[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+};
+
+static const u32 tonga_mgcg_cgcg_init[] =
+{
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
+};
+
+static const u32 golden_settings_fiji_a10[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] =
+{
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
+};
+
+static const u32 golden_settings_polaris11_a11[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+};
+
+static const u32 golden_settings_polaris10_a11[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100,
+};
+
+static const u32 cz_golden_settings_a11[] =
+{
+ mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800,
+ mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
+ mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
+ mmSDMA1_GFX_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA1_POWER_CNTL, 0x00000800, 0x0003c800,
+ mmSDMA1_RLC0_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA1_RLC1_IB_CNTL, 0x00000100, 0x00000100,
+};
+
+static const u32 cz_mgcg_cgcg_init[] =
+{
+ mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
+ mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
+};
+
+static const u32 stoney_golden_settings_a11[] =
+{
+ mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800,
+ mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100,
+ mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100,
+};
+
+static const u32 stoney_mgcg_cgcg_init[] =
+{
+ mmSDMA0_CLK_CTRL, 0xffffffff, 0x00000100,
+};
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines. These engines are used for compute
+ * and gfx. There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things. It also has support for tiling/detiling of
+ * buffers.
+ */
+
+static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(fiji_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_fiji_a10,
+ ARRAY_SIZE(golden_settings_fiji_a10));
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(tonga_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_tonga_a11,
+ ARRAY_SIZE(golden_settings_tonga_a11));
+ break;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris11_a11,
+ ARRAY_SIZE(golden_settings_polaris11_a11));
+ break;
+ case CHIP_POLARIS10:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_polaris10_a11,
+ ARRAY_SIZE(golden_settings_polaris10_a11));
+ break;
+ case CHIP_CARRIZO:
+ amdgpu_device_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+ ARRAY_SIZE(cz_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ cz_golden_settings_a11,
+ ARRAY_SIZE(cz_golden_settings_a11));
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_program_register_sequence(adev,
+ stoney_mgcg_cgcg_init,
+ ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ stoney_golden_settings_a11,
+ ARRAY_SIZE(stoney_golden_settings_a11));
+ break;
+ default:
+ break;
+ }
+}
+
+static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+}
+
+/**
+ * sdma_v3_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0, i;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_TONGA:
+ chip_name = "tonga";
+ break;
+ case CHIP_FIJI:
+ chip_name = "fiji";
+ break;
+ case CHIP_POLARIS10:
+ chip_name = "polaris10";
+ break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
+ case CHIP_POLARIS12:
+ chip_name = "polaris12";
+ break;
+ case CHIP_VEGAM:
+ chip_name = "vegam";
+ break;
+ case CHIP_CARRIZO:
+ chip_name = "carrizo";
+ break;
+ case CHIP_STONEY:
+ chip_name = "stoney";
+ break;
+ default: BUG();
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ if (err)
+ goto out;
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma.instance[i].feature_version >= 20)
+ adev->sdma.instance[i].burst_nop = true;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ }
+out:
+ if (err) {
+ pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name);
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+ }
+ return err;
+}
+
+/**
+ * sdma_v3_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (VI+).
+ */
+static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ /* XXX check if swapping is necessary on BE */
+ return *ring->rptr_cpu_addr >> 2;
+}
+
+/**
+ * sdma_v3_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VI+).
+ */
+static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 wptr;
+
+ if (ring->use_doorbell || ring->use_pollmem) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = *ring->wptr_cpu_addr >> 2;
+ } else {
+ wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
+ }
+
+ return wptr;
+}
+
+/**
+ * sdma_v3_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VI+).
+ */
+static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, ring->wptr << 2);
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
+ } else if (ring->use_pollmem) {
+ u32 *wb = (u32 *)ring->wptr_cpu_addr;
+
+ WRITE_ONCE(*wb, ring->wptr << 2);
+ } else {
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
+ }
+}
+
+static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (VI).
+ */
+static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+/**
+ * sdma_v3_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ u32 ref_and_mask = 0;
+
+ if (ring->me == 0)
+ ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
+ else
+ ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
+ amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (VI).
+ */
+static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+/**
+ * sdma_v3_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (VI).
+ */
+static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+ ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+ }
+}
+
+/**
+ * sdma_v3_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (VI).
+ */
+static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+ if (enable) {
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, 1);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ ATC_L1_ENABLE, 1);
+ if (amdgpu_sdma_phase_quantum) {
+ WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+ phase_quantum);
+ }
+ } else {
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, 0);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ ATC_L1_ENABLE, 1);
+ }
+
+ WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+ }
+}
+
+/**
+ * sdma_v3_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (VI).
+ */
+static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v3_0_gfx_stop(adev);
+ sdma_v3_0_rlc_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
+ if (enable)
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
+ else
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
+ }
+}
+
+/**
+ * sdma_v3_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u64 wptr_gpu_addr;
+ int i, j, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (j = 0; j < 16; j++) {
+ vi_srbm_select(adev, 0, 0, 0, j);
+ /* SDMA GFX */
+ WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
+ }
+ vi_srbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
+ WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ ring->wptr = 0;
+ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
+ sdma_v3_0_ring_set_wptr(ring);
+ WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
+
+ doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]);
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL,
+ OFFSET, ring->doorbell_index);
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+
+ WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i],
+ lower_32_bits(wptr_gpu_addr));
+ WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
+ if (ring->use_pollmem) {
+ /*wptr polling is not enogh fast, directly clean the wptr register */
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ ENABLE, 1);
+ } else {
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ ENABLE, 0);
+ }
+ WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+ }
+
+ /* unhalt the MEs */
+ sdma_v3_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v3_0_ctx_switch_enable(adev, true);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v3_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+ return 0;
+}
+
+/**
+ * sdma_v3_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v3_0_start(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* disable sdma engine before programing it */
+ sdma_v3_0_ctx_switch_enable(adev, false);
+ sdma_v3_0_enable(adev, false);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v3_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v3_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * sdma_v3_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v3_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (VI).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v3_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (CIK).
+ */
+static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+}
+
+/**
+ * sdma_v3_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (CIK).
+ */
+static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v3_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
+}
+
+/**
+ * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VI).
+ */
+static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for flush */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0); /* reference */
+ amdgpu_ring_write(ring, 0); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static int sdma_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ adev->sdma.num_instances = 1;
+ break;
+ default:
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ break;
+ }
+
+ sdma_v3_0_set_ring_funcs(adev);
+ sdma_v3_0_set_buffer_funcs(adev);
+ sdma_v3_0_set_vm_pte_funcs(adev);
+ sdma_v3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ /* SDMA Privileged inst */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
+ &adev->sdma.illegal_inst_irq);
+ if (r)
+ return r;
+
+ r = sdma_v3_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i];
+ } else {
+ ring->use_pollmem = true;
+ }
+
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v3_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ sdma_v3_0_free_microcode(adev);
+ return 0;
+}
+
+static int sdma_v3_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v3_0_init_golden_registers(adev);
+
+ r = sdma_v3_0_start(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int sdma_v3_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v3_0_ctx_switch_enable(adev, false);
+ sdma_v3_0_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v3_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v3_0_hw_fini(adev);
+}
+
+static int sdma_v3_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v3_0_hw_init(adev);
+}
+
+static bool sdma_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS2);
+
+ if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int sdma_v3_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
+ SRBM_STATUS2__SDMA1_BUSY_MASK);
+
+ if (!tmp)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static bool sdma_v3_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS2);
+
+ if ((tmp & SRBM_STATUS2__SDMA_BUSY_MASK) ||
+ (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)) {
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
+ }
+
+ if (srbm_soft_reset) {
+ adev->sdma.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->sdma.srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int sdma_v3_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+
+ if (!adev->sdma.srbm_soft_reset)
+ return 0;
+
+ srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
+ if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) ||
+ REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) {
+ sdma_v3_0_ctx_switch_enable(adev, false);
+ sdma_v3_0_enable(adev, false);
+ }
+
+ return 0;
+}
+
+static int sdma_v3_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+
+ if (!adev->sdma.srbm_soft_reset)
+ return 0;
+
+ srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
+ if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) ||
+ REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) {
+ sdma_v3_0_gfx_resume(adev);
+ sdma_v3_0_rlc_resume(adev);
+ }
+
+ return 0;
+}
+
+static int sdma_v3_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp;
+
+ if (!adev->sdma.srbm_soft_reset)
+ return 0;
+
+ srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int sdma_v3_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ switch (type) {
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
+ WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
+ WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id, queue_id;
+
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ queue_id = (entry->ring_id & 0xc) >> 2;
+ DRM_DEBUG("IH: SDMA trap\n");
+ switch (instance_id) {
+ case 0:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ case 1:
+ switch (queue_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 instance_id, queue_id;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+ instance_id = (entry->ring_id & 0x3) >> 0;
+ queue_id = (entry->ring_id & 0xc) >> 2;
+
+ if (instance_id <= 1 && queue_id == 0)
+ drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
+ return 0;
+}
+
+static void sdma_v3_0_update_sdma_medium_grain_clock_gating(
+ struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+ int i;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]);
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (data != temp)
+ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data);
+ }
+ } else {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]);
+ data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK;
+
+ if (data != temp)
+ WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data);
+ }
+ }
+}
+
+static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
+ struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+ int i;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]);
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+
+ if (temp != data)
+ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data);
+ }
+ } else {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]);
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+
+ if (temp != data)
+ WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data);
+ }
+ }
+}
+
+static int sdma_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ sdma_v3_0_update_sdma_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ sdma_v3_0_update_sdma_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[0]);
+ if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[0]);
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
+ .name = "sdma_v3_0",
+ .early_init = sdma_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v3_0_sw_init,
+ .sw_fini = sdma_v3_0_sw_fini,
+ .hw_init = sdma_v3_0_hw_init,
+ .hw_fini = sdma_v3_0_hw_fini,
+ .suspend = sdma_v3_0_suspend,
+ .resume = sdma_v3_0_resume,
+ .is_idle = sdma_v3_0_is_idle,
+ .wait_for_idle = sdma_v3_0_wait_for_idle,
+ .check_soft_reset = sdma_v3_0_check_soft_reset,
+ .pre_soft_reset = sdma_v3_0_pre_soft_reset,
+ .post_soft_reset = sdma_v3_0_post_soft_reset,
+ .soft_reset = sdma_v3_0_soft_reset,
+ .set_clockgating_state = sdma_v3_0_set_clockgating_state,
+ .set_powergating_state = sdma_v3_0_set_powergating_state,
+ .get_clockgating_state = sdma_v3_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = false,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v3_0_ring_get_rptr,
+ .get_wptr = sdma_v3_0_ring_get_wptr,
+ .set_wptr = sdma_v3_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v3_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v3_0_ring_emit_pipeline_sync */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v3_0_ring_emit_vm_flush */
+ 10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */
+ .emit_ib = sdma_v3_0_ring_emit_ib,
+ .emit_fence = sdma_v3_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v3_0_ring_test_ring,
+ .test_ib = sdma_v3_0_ring_test_ib,
+ .insert_nop = sdma_v3_0_ring_insert_nop,
+ .pad_ib = sdma_v3_0_ring_pad_ib,
+ .emit_wreg = sdma_v3_0_ring_emit_wreg,
+};
+
+static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = {
+ .set = sdma_v3_0_set_trap_irq_state,
+ .process = sdma_v3_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = {
+ .process = sdma_v3_0_process_illegal_inst_irq,
+};
+
+static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &sdma_v3_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: unused
+ *
+ * Copy GPU buffers using the DMA engine (VI).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (VI).
+ */
+static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
+ .copy_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v3_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v3_0_emit_fill_buffer,
+};
+
+static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v3_0_vm_copy_pte,
+
+ .write_pte = sdma_v3_0_vm_write_pte,
+ .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
+};
+
+static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v3_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version sdma_v3_1_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &sdma_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h
new file mode 100644
index 000000000..7aa223d35
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V3_0_H__
+#define __SDMA_V3_0_H__
+
+extern const struct amdgpu_ip_block_version sdma_v3_0_ip_block;
+extern const struct amdgpu_ip_block_version sdma_v3_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
new file mode 100644
index 000000000..cd37f45e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -0,0 +1,2645 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "sdma0/sdma0_4_2_offset.h"
+#include "sdma0/sdma0_4_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_offset.h"
+#include "sdma1/sdma1_4_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
+#include "sdma0/sdma0_4_1_default.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "vega10_sdma_pkt_open.h"
+
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
+#include "amdgpu_ras.h"
+#include "sdma_v4_4.h"
+
+MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");
+MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
+MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+
+#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
+#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
+
+#define WREG32_SDMA(instance, offset, value) \
+ WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+ RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
+
+static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+
+static const struct soc15_reg_golden golden_settings_sdma_4[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_arct[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_UTCL1_TIMEOUT, 0xffffffff, 0x00010001)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_aldebaran[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA2_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+ SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_UTCL1_TIMEOUT, 0xffffffff, 0x00010001),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+};
+
+static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
+ u32 instance, u32 offset)
+{
+ switch (instance) {
+ case 0:
+ return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
+ case 1:
+ return (adev->reg_offset[SDMA1_HWIP][0][0] + offset);
+ case 2:
+ return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
+ case 3:
+ return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
+ case 4:
+ return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
+ case 5:
+ return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
+ case 6:
+ return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
+ case 7:
+ return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static unsigned sdma_v4_0_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ case 4:
+ return SOC15_IH_CLIENTID_SDMA4;
+ case 5:
+ return SOC15_IH_CLIENTID_SDMA5;
+ case 6:
+ return SOC15_IH_CLIENTID_SDMA6;
+ case 7:
+ return SOC15_IH_CLIENTID_SDMA7;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ return 3;
+ case SOC15_IH_CLIENTID_SDMA4:
+ return 4;
+ case SOC15_IH_CLIENTID_SDMA5:
+ return 5;
+ case SOC15_IH_CLIENTID_SDMA6:
+ return 6;
+ case SOC15_IH_CLIENTID_SDMA7:
+ return 7;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg10,
+ ARRAY_SIZE(golden_settings_sdma_vg10));
+ break;
+ case IP_VERSION(4, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg12,
+ ARRAY_SIZE(golden_settings_sdma_vg12));
+ break;
+ case IP_VERSION(4, 2, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma0_4_2_init,
+ ARRAY_SIZE(golden_settings_sdma0_4_2_init));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma0_4_2,
+ ARRAY_SIZE(golden_settings_sdma0_4_2));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma1_4_2,
+ ARRAY_SIZE(golden_settings_sdma1_4_2));
+ break;
+ case IP_VERSION(4, 2, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_arct,
+ ARRAY_SIZE(golden_settings_sdma_arct));
+ break;
+ case IP_VERSION(4, 4, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_aldebaran,
+ ARRAY_SIZE(golden_settings_sdma_aldebaran));
+ break;
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4_1,
+ ARRAY_SIZE(golden_settings_sdma_4_1));
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv2,
+ ARRAY_SIZE(golden_settings_sdma_rv2));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv1,
+ ARRAY_SIZE(golden_settings_sdma_rv1));
+ break;
+ case IP_VERSION(4, 1, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4_3,
+ ARRAY_SIZE(golden_settings_sdma_4_3));
+ break;
+ default:
+ break;
+ }
+}
+
+static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
+{
+ int i;
+
+ /*
+ * The only chips with SDMAv4 and ULV are VG10 and VG20.
+ * Server SKUs take a different hysteresis setting from other SKUs.
+ */
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ if (adev->pdev->device == 0x6860)
+ break;
+ return;
+ case IP_VERSION(4, 2, 0):
+ if (adev->pdev->device == 0x66a1)
+ break;
+ return;
+ default:
+ return;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ uint32_t temp;
+
+ temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0);
+ WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp);
+ }
+}
+
+/**
+ * sdma_v4_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+
+// emulation only, won't work on real chip
+// vega10 real chip need to use PSP to load firmware
+static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
+ adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) {
+ /* Acturus & Aldebaran will leverage the same FW memory
+ for every SDMA instance */
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * sdma_v4_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (VEGA10+).
+ */
+static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = ((u64 *)ring->rptr_cpu_addr);
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v4_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VEGA10+).
+ */
+static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+ ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VEGA10+).
+ */
+static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
+
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+/**
+ * sdma_v4_0_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VEGA10+).
+ */
+static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ } else {
+ wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_0_page_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VEGA10+).
+ */
+static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)ring->wptr_cpu_addr;
+
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ uint64_t wptr = ring->wptr << 2;
+
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
+ lower_32_bits(wptr));
+ WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
+ upper_32_bits(wptr));
+ }
+}
+
+static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (VEGA10).
+ */
+static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ sdma_v4_0_wait_reg_mem(ring, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
+}
+
+/**
+ * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (VEGA10).
+ */
+static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+
+/**
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
+ */
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (VEGA10).
+ */
+static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v4_0_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the page async dma ring buffers (VEGA10).
+ */
+static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+ RB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
+ IB_ENABLE, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VEGA10).
+ */
+static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
+ }
+ WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
+
+ /*
+ * Enable SDMA utilization. Its only supported on
+ * Arcturus for the moment and firmware version 14
+ * and above.
+ */
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) &&
+ adev->sdma.instance[i].fw_version >= 14)
+ WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
+ }
+
+}
+
+/**
+ * sdma_v4_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (VEGA10).
+ */
+static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v4_0_gfx_enable(adev, enable);
+ sdma_v4_0_rlc_stop(adev);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_0_page_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
+ }
+}
+
+/*
+ * sdma_v4_0_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+ /* Set ring buffer size in dwords */
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ return rb_cntl;
+}
+
+/**
+ * sdma_v4_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the gfx DMA ring buffers and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
+
+ sdma_v4_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
+}
+
+/**
+ * sdma_v4_0_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the page DMA ring buffers and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+
+ rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+ rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA0_PAGE_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
+ WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
+
+ /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */
+ sdma_v4_0_page_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_PAGE_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+}
+
+static void
+sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
+ /* enable idle interrupt */
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
+ data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+
+ if (data != def)
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
+ } else {
+ /* disable idle interrupt */
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
+ data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ if (data != def)
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
+ }
+}
+
+static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ /* Enable HW based PG. */
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
+ data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
+ if (data != def)
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
+
+ /* enable interrupt */
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
+ data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ if (data != def)
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
+
+ /* Configure hold time to filter in-valid power on/off request. Use default right now */
+ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
+ data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
+ data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
+ /* Configure switch time for hysteresis purpose. Use default right now */
+ data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
+ data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
+ if(data != def)
+ WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
+}
+
+static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
+{
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
+ return;
+
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ sdma_v4_1_init_power_gating(adev);
+ sdma_v4_1_update_power_gating(adev, true);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * sdma_v4_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
+{
+ sdma_v4_0_init_pg(adev);
+
+ return 0;
+}
+
+/**
+ * sdma_v4_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v4_0_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
+
+ for (j = 0; j < fw_size; j++)
+ WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v4_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_0_ctx_switch_enable(adev, false);
+ sdma_v4_0_enable(adev, false);
+ } else {
+
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ r = sdma_v4_0_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v4_0_ctx_switch_enable(adev, true);
+ }
+
+ /* start the gfx rings and rlc compute queues */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ uint32_t temp;
+
+ WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+ sdma_v4_0_gfx_resume(adev, i);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_0_page_resume(adev, i);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32_SDMA(i, mmSDMA0_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+ WREG32_SDMA(i, mmSDMA0_CNTL, temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_0_ctx_switch_enable(adev, true);
+ sdma_v4_0_enable(adev, true);
+ } else {
+ r = sdma_v4_0_rlc_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+ r = amdgpu_ring_test_helper(page);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == page)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return r;
+}
+
+/**
+ * sdma_v4_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v4_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (VEGA10).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (VEGA10).
+ */
+static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v4_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (VEGA10).
+ */
+static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (VEGA10).
+ */
+static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ */
+static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ sdma_v4_0_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
+}
+
+
+/**
+ * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VEGA10).
+ */
+static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
+}
+
+static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
+{
+ uint fw_version = adev->sdma.instance[0].fw_version;
+
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ return fw_version >= 430;
+ case IP_VERSION(4, 0, 1):
+ /*return fw_version >= 31;*/
+ return false;
+ case IP_VERSION(4, 2, 0):
+ return fw_version >= 123;
+ default:
+ return false;
+ }
+}
+
+static int sdma_v4_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = sdma_v4_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ /* TODO: Page queue breaks driver reload under SRIOV */
+ if ((adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 0, 0)) &&
+ amdgpu_sriov_vf((adev)))
+ adev->sdma.has_page_queue = false;
+ else if (sdma_v4_0_fw_support_paging_queue(adev))
+ adev->sdma.has_page_queue = true;
+
+ sdma_v4_0_set_ring_funcs(adev);
+ sdma_v4_0_set_buffer_funcs(adev);
+ sdma_v4_0_set_vm_pte_funcs(adev);
+ sdma_v4_0_set_irq_funcs(adev);
+ sdma_v4_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+
+static int sdma_v4_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v4_0_setup_ulv(adev);
+
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
+ adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
+ adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ }
+
+ return 0;
+}
+
+static int sdma_v4_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA SRAM ECC event */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ /* doorbell size is 2 dwords, get DWORD offset */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+
+ /*
+ * On Arcturus, SDMA instance 5~7 has a different vmhub
+ * type(AMDGPU_MMHUB1).
+ */
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ ring = &adev->sdma.instance[i].page;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ /* paging queue use same doorbell index/routing as gfx queue
+ * with 0x400 (4096 dwords) offset on second doorbell page
+ */
+ if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
+ adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) {
+ ring->doorbell_index =
+ adev->doorbell_index.sdma_engine[i] << 1;
+ ring->doorbell_index += 0x400;
+ } else {
+ /* From vega20, the sdma_doorbell_range in 1st
+ * doorbell page is reserved for page queue.
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ }
+
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "page%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int sdma_v4_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
+ }
+
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
+ adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ return 0;
+}
+
+static int sdma_v4_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_0_init_golden_registers(adev);
+
+ return sdma_v4_0_start(adev);
+}
+
+static int sdma_v4_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ return 0;
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_0_ctx_switch_enable(adev, false);
+ sdma_v4_0_enable(adev, false);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+
+ return 0;
+}
+
+static int sdma_v4_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SMU saves SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_gfx_enable(adev, false);
+ return 0;
+ }
+
+ return sdma_v4_0_hw_fini(adev);
+}
+
+static int sdma_v4_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SMU restores SDMA state for us */
+ if (adev->in_s0ix) {
+ sdma_v4_0_enable(adev, true);
+ sdma_v4_0_gfx_enable(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ return 0;
+ }
+
+ return sdma_v4_0_hw_init(adev);
+}
+
+static bool sdma_v4_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v4_0_wait_for_idle(void *handle)
+{
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG);
+ if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v4_0_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, mmSDMA0_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, mmSDMA0_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t instance;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instance].ring);
+ break;
+ case 1:
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0))
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ if (adev->ip_versions[SDMA0_HWIP][0] != IP_VERSION(4, 2, 0))
+ amdgpu_fence_process(&adev->sdma.instance[instance].page);
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ goto out;
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ goto out;
+
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+
+out:
+ return AMDGPU_RAS_SUCCESS;
+}
+
+static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return 0;
+
+ switch (entry->ring_id) {
+ case 0:
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_edc_config;
+
+ sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);
+ sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);
+
+ return 0;
+}
+
+static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+ struct amdgpu_task_info task_info;
+ u64 addr;
+
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0 || instance >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_dbg_ratelimited(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
+ "pasid:%u, for process %s pid %d thread %s pid %d\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ return 0;
+}
+
+static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static void sdma_v4_0_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
+ }
+ } else {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL);
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);
+ }
+ }
+}
+
+
+static void sdma_v4_0_update_medium_grain_light_sleep(
+ struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
+ }
+ } else {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL);
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);
+ }
+ }
+}
+
+static int sdma_v4_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v4_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ sdma_v4_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static int sdma_v4_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 1, 0):
+ case IP_VERSION(4, 1, 1):
+ case IP_VERSION(4, 1, 2):
+ sdma_v4_1_update_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
+ .name = "sdma_v4_0",
+ .early_init = sdma_v4_0_early_init,
+ .late_init = sdma_v4_0_late_init,
+ .sw_init = sdma_v4_0_sw_init,
+ .sw_fini = sdma_v4_0_sw_fini,
+ .hw_init = sdma_v4_0_hw_init,
+ .hw_fini = sdma_v4_0_hw_fini,
+ .suspend = sdma_v4_0_suspend,
+ .resume = sdma_v4_0_resume,
+ .is_idle = sdma_v4_0_is_idle,
+ .wait_for_idle = sdma_v4_0_wait_for_idle,
+ .soft_reset = sdma_v4_0_soft_reset,
+ .set_clockgating_state = sdma_v4_0_set_clockgating_state,
+ .set_powergating_state = sdma_v4_0_set_powergating_state,
+ .get_clockgating_state = sdma_v4_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_ring_get_wptr,
+ .set_wptr = sdma_v4_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v4_0_ring_get_rptr,
+ .get_wptr = sdma_v4_0_page_ring_get_wptr,
+ .set_wptr = sdma_v4_0_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_0_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+ .emit_ib = sdma_v4_0_ring_emit_ib,
+ .emit_fence = sdma_v4_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_0_ring_test_ring,
+ .test_ib = sdma_v4_0_ring_test_ib,
+ .insert_nop = sdma_v4_0_ring_insert_nop,
+ .pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ if (adev->sdma.has_page_queue) {
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_0_page_ring_funcs;
+ adev->sdma.instance[i].page.me = i;
+ }
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
+ .set = sdma_v4_0_set_trap_irq_state,
+ .process = sdma_v4_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
+ .process = sdma_v4_0_process_illegal_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
+ .set = sdma_v4_0_set_ecc_irq_state,
+ .process = amdgpu_sdma_process_ecc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_vm_hole_irq_funcs = {
+ .process = sdma_v4_0_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_0_process_doorbell_invalid_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_pool_timeout_irq_funcs = {
+ .process = sdma_v4_0_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
+ .process = sdma_v4_0_process_srbm_write_irq,
+};
+
+static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ /*For Arcturus and Aldebaran, add another 4 irq handler*/
+ switch (adev->sdma.num_instances) {
+ case 5:
+ case 8:
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+ break;
+ default:
+ break;
+ }
+ adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
+ adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_0_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_0_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_0_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_0_srbm_write_irq_funcs;
+}
+
+/**
+ * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: if a secure copy should be used
+ *
+ * Copy GPU buffers using the DMA engine (VEGA10/12).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (VEGA10/12).
+ */
+static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v4_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v4_0_emit_fill_buffer,
+};
+
+static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
+ else
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_0_vm_copy_pte,
+
+ .write_pte = sdma_v4_0_vm_write_pte,
+ .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
+};
+
+static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ struct drm_gpu_scheduler *sched;
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_0_ras_fields[i].sec_count_mask) >>
+ sdma_v4_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_0_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
+ }
+ }
+}
+
+static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+
+ reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_0_get_ras_error_count(reg_value,
+ instance, &sec_count);
+ /* err_data->ce_count should be initialized to 0
+ * before calling into this function */
+ err_data->ce_count += sec_count;
+ /* double bit error is not supported
+ * set ue count to 0 */
+ err_data->ue_count = 0;
+
+ return 0;
+};
+
+static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+}
+
+static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+
+ /* read back edc counter registers to clear the counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
+ }
+}
+
+const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_0_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
+};
+
+static struct amdgpu_sdma_ras sdma_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_0_ras_hw_ops,
+ .ras_cb = sdma_v4_0_process_ras_data_cb,
+ },
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 2, 0):
+ case IP_VERSION(4, 2, 2):
+ adev->sdma.ras = &sdma_v4_0_ras;
+ break;
+ case IP_VERSION(4, 4, 0):
+ adev->sdma.ras = &sdma_v4_4_ras;
+ break;
+ default:
+ break;
+ }
+
+}
+
+const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h
new file mode 100644
index 000000000..5c5a7479a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V4_0_H__
+#define __SDMA_V4_0_H__
+
+extern const struct amd_ip_funcs sdma_v4_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
new file mode 100644
index 000000000..0ddb6955a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "sdma/sdma_4_4_0_offset.h"
+#include "sdma/sdma_4_4_0_sh_mask.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA2_REG_OFFSET 0x1cda0
+#define SDMA3_REG_OFFSET 0x1d1a0
+#define SDMA4_REG_OFFSET 0x1d5a0
+
+/* helper function that allow only use sdma0 register offset
+ * to calculate register offset for all the sdma instances */
+static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
+ uint32_t instance,
+ uint32_t offset)
+{
+ uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
+
+ switch (instance) {
+ case 0:
+ return (sdma_base + offset);
+ case 1:
+ return (sdma_base + SDMA1_REG_OFFSET + offset);
+ case 2:
+ return (sdma_base + SDMA2_REG_OFFSET + offset);
+ case 3:
+ return (sdma_base + SDMA3_REG_OFFSET + offset);
+ case 4:
+ return (sdma_base + SDMA4_REG_OFFSET + offset);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
+};
+
+static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
+ uint32_t reg_offset,
+ uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
+ if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
+ continue;
+
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_4_ras_fields[i].sec_count_mask) >>
+ sdma_v4_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_4_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
+ }
+ }
+}
+
+static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
+ uint32_t instance,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+ uint32_t reg_offset = 0;
+
+ reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
+ reg_value = RREG32(reg_offset);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
+ instance, &sec_count);
+
+ reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
+ reg_value = RREG32(reg_offset);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
+ instance, &sec_count);
+
+ /*
+ * err_data->ue_count should be initialized to 0
+ * before calling into this function
+ *
+ * SDMA RAS supports single bit uncorrectable error detection.
+ * So, increment uncorrectable error count.
+ */
+ err_data->ue_count += sec_count;
+
+ /*
+ * SDMA RAS does not support correctable errors.
+ * Set ce count to 0.
+ */
+ err_data->ce_count = 0;
+
+ return 0;
+};
+
+static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_offset;
+
+ /* write 0 to EDC_COUNTER reg to clear sdma edc counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
+ WREG32(reg_offset, 0);
+ reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
+ WREG32(reg_offset, 0);
+ }
+ }
+}
+
+static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ int i = 0;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
+ dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
+ return;
+ }
+ }
+
+}
+
+const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
+};
+
+struct amdgpu_sdma_ras sdma_v4_4_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_ras_hw_ops,
+ },
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
new file mode 100644
index 000000000..a9f0c6835
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V4_4_H__
+#define __SDMA_V4_4_H__
+
+extern struct amdgpu_sdma_ras sdma_v4_4_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
new file mode 100644
index 000000000..f413898dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -0,0 +1,2200 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "vega10_sdma_pkt_open.h"
+
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
+#include "amdgpu_ras.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+
+#define WREG32_SDMA(instance, offset, value) \
+ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)))
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
+
+static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
+ u32 instance, u32 offset)
+{
+ u32 dev_inst = GET_INST(SDMA0, instance);
+
+ return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
+}
+
+static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+{
+ switch (client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ return 0;
+ case SOC15_IH_CLIENTID_SDMA1:
+ return 1;
+ case SOC15_IH_CLIENTID_SDMA2:
+ return 2;
+ case SOC15_IH_CLIENTID_SDMA3:
+ return 3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 val;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
+
+ val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
+ 4);
+ val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
+ PIPE_INTERLEAVE_SIZE, 0);
+ WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
+ }
+}
+
+/**
+ * sdma_v4_4_2_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) {
+ ret = amdgpu_sdma_init_microcode(adev, 0, true);
+ break;
+ } else {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * sdma_v4_4_2_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v4_4_2_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR);
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
+ ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR,
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI,
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+/**
+ * sdma_v4_4_2_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ } else {
+ wptr = RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI);
+ wptr = wptr << 32;
+ wptr |= RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v4_4_2_page_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+ /* XXX check if swapping is necessary on BE */
+ WRITE_ONCE(*wb, (ring->wptr << 2));
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ uint64_t wptr = ring->wptr << 2;
+
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR,
+ lower_32_bits(wptr));
+ WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI,
+ upper_32_bits(wptr));
+ }
+}
+
+static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+
+}
+
+static void sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
+}
+
+/**
+ * sdma_v4_4_2_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+}
+
+
+/**
+ * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 rb_cntl, ib_cntl;
+ int i, unset = 0;
+
+ for_each_inst(i, inst_mask) {
+ sdma[i] = &adev->sdma.instance[i].ring;
+
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = 1;
+ }
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v4_4_2_inst_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the page async dma ring buffers.
+ */
+static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ u32 rb_cntl, ib_cntl;
+ int i;
+ bool unset = false;
+
+ for_each_inst(i, inst_mask) {
+ sdma[i] = &adev->sdma.instance[i].page;
+
+ if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
+ (!unset)) {
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ unset = true;
+ }
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL,
+ IB_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SDMA(i, regSDMA_PHASE0_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE1_QUANTUM, phase_quantum);
+ WREG32_SDMA(i, regSDMA_PHASE2_QUANTUM, phase_quantum);
+ }
+ WREG32_SDMA(i, regSDMA_CNTL, f32_cntl);
+
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
+ }
+}
+
+/**
+ * sdma_v4_4_2_inst_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
+ uint32_t inst_mask)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ /* SDMA FW needs to respond to FREEZE requests during reset.
+ * Keep it running during reset */
+ if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+ return;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ return;
+
+ for_each_inst(i, inst_mask) {
+ f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
+ }
+}
+
+/*
+ * sdma_v4_4_2_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+ /* Set ring buffer size in dwords */
+ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+ barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ return rb_cntl;
+}
+
+/**
+ * sdma_v4_4_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+
+ sdma_v4_4_2_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+}
+
+/**
+ * sdma_v4_4_2_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the page DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
+{
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+ u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u64 wptr_gpu_addr;
+
+ wb_offset = (ring->rptr_offs * 4);
+
+ rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
+ rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_LO,
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
+ RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
+
+ doorbell = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_PAGE_DOORBELL, ENABLE,
+ ring->use_doorbell);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_PAGE_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET, doorbell_offset);
+
+ /* paging queue doorbell range is setup at sdma_v4_4_2_gfx_resume */
+ sdma_v4_4_2_page_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL);
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA_PAGE_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+
+ ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
+}
+
+static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
+{
+
+}
+
+/**
+ * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ sdma_v4_4_2_init_pg(adev);
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ for_each_inst(i, inst_mask) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR, 0);
+
+ for (j = 0; j < fw_size; j++)
+ WREG32_SDMA(i, regSDMA_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SDMA(i, regSDMA_UCODE_ADDR,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v4_4_2_inst_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ struct amdgpu_ring *ring;
+ uint32_t tmp_mask;
+ int i, r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+ } else {
+ /* bypass sdma microcode loading on Gopher */
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ adev->sdma.instance[0].fw) {
+ r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ /* enable sdma ring preemption */
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ }
+
+ /* start the gfx rings and rlc compute queues */
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ uint32_t temp;
+
+ WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+ sdma_v4_4_2_gfx_resume(adev, i);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_page_resume(adev, i);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32_SDMA(i, regSDMA_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ /* unhalt engine */
+ temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0);
+ WREG32_SDMA(i, regSDMA_F32_CNTL, temp);
+ }
+ }
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, true, inst_mask);
+ } else {
+ r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
+ if (r)
+ return r;
+ }
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ ring = &adev->sdma.instance[i].ring;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+ r = amdgpu_ring_test_helper(page);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == page)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v4_4_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * sdma_v4_4_2_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v4_4_2_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v4_4_2_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v4_4_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v4_4_2_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v4_4_2_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ */
+static void sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ sdma_v4_4_2_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
+}
+
+
+/**
+ * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ sdma_v4_4_2_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
+}
+
+static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(4, 4, 2):
+ return false;
+ default:
+ return false;
+ }
+}
+
+static int sdma_v4_4_2_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = sdma_v4_4_2_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ /* TODO: Page queue breaks driver reload under SRIOV */
+ if (sdma_v4_4_2_fw_support_paging_queue(adev))
+ adev->sdma.has_page_queue = true;
+
+ sdma_v4_4_2_set_ring_funcs(adev);
+ sdma_v4_4_2_set_buffer_funcs(adev);
+ sdma_v4_4_2_set_vm_pte_funcs(adev);
+ sdma_v4_4_2_set_irq_funcs(adev);
+ sdma_v4_4_2_set_ras_funcs(adev);
+
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+#endif
+
+static int sdma_v4_4_2_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+#if 0
+ struct ras_ih_if ih_info = {
+ .cb = sdma_v4_4_2_process_ras_data_cb,
+ };
+#endif
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
+ adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
+ adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ }
+
+ return 0;
+}
+
+static int sdma_v4_4_2_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 aid_id;
+
+ /* SDMA trap event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA SRAM ECC event */
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
+ &adev->sdma.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ aid_id = adev->sdma.instance[i].aid_id;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ /* doorbell size is 2 dwords, get DWORD offset */
+ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "sdma%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ if (adev->sdma.has_page_queue) {
+ ring = &adev->sdma.instance[i].page;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ /* doorbell index of page queue is assigned right after
+ * gfx queue on the same instance
+ */
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] + 1) << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(aid_id);
+
+ sprintf(ring->name, "page%d.%d", aid_id,
+ i % adev->sdma.num_inst_per_aid);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "fail to initialize sdma ras block\n");
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int sdma_v4_4_2_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+ amdgpu_ring_fini(&adev->sdma.instance[i].page);
+ }
+
+ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2))
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+ else
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask);
+
+ return r;
+}
+
+static int sdma_v4_4_2_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
+ int i;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+
+static int sdma_v4_4_2_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_in_reset(adev))
+ sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+
+ return sdma_v4_4_2_hw_fini(adev);
+}
+
+static int sdma_v4_4_2_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v4_4_2_hw_init(adev);
+}
+
+static bool sdma_v4_4_2_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32_SDMA(i, regSDMA_STATUS_REG);
+
+ if (!(tmp & SDMA_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v4_4_2_wait_for_idle(void *handle)
+{
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32_SDMA(j, regSDMA_STATUS_REG);
+ if (!(sdma[j] & SDMA_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v4_4_2_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t instance, i;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+ instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+
+ /* Client id gives the SDMA instance in AID. To know the exact SDMA
+ * instance, interrupt entry gives the node id which corresponds to the AID instance.
+ * Match node id with the AID id associated with the SDMA instance. */
+ for (i = instance; i < adev->sdma.num_instances;
+ i += adev->sdma.num_inst_per_aid) {
+ if (adev->sdma.instance[i].aid_id ==
+ node_id_to_phys_map[entry->node_id])
+ break;
+ }
+
+ if (i >= adev->sdma.num_instances) {
+ dev_WARN_ONCE(
+ adev->dev, 1,
+ "Couldn't find the right sdma instance in trap handler");
+ return 0;
+ }
+
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[i].ring);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#if 0
+static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
+ goto out;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ goto out;
+
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+
+out:
+ return AMDGPU_RAS_SUCCESS;
+}
+#endif
+
+static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+
+ DRM_ERROR("Illegal instruction in SDMA command stream\n");
+
+ instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return 0;
+
+ switch (entry->ring_id) {
+ case 0:
+ drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
+ DRAM_ECC_INT_ENABLE, 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
+ break;
+ /* sdma ecc interrupt is enabled by default
+ * driver doesn't need to do anything to
+ * enable the interrupt */
+ case AMDGPU_IRQ_STATE_ENABLE:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ int instance;
+ struct amdgpu_task_info task_info;
+ u64 addr;
+
+ instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ if (instance < 0 || instance >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_dbg_ratelimited(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
+ "pasid:%u, for process %s pid %d thread %s pid %d\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_dbg_ratelimited(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ /* 1-not override: enable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ /* 0-override:disable sdma mem light sleep */
+ def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
+ data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
+ }
+ }
+}
+
+static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
+{
+ uint32_t data, def;
+ int i;
+
+ /* leave as default if it is not driver controlled */
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG))
+ return;
+
+ if (enable) {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ } else {
+ for_each_inst(i, inst_mask) {
+ def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
+ data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
+ }
+ }
+}
+
+static int sdma_v4_4_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t inst_mask;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+
+ sdma_v4_4_2_inst_update_medium_grain_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ sdma_v4_4_2_inst_update_medium_grain_light_sleep(
+ adev, state == AMD_CG_STATE_GATE, inst_mask);
+ return 0;
+}
+
+static int sdma_v4_4_2_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
+ if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
+ if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
+ .name = "sdma_v4_4_2",
+ .early_init = sdma_v4_4_2_early_init,
+ .late_init = sdma_v4_4_2_late_init,
+ .sw_init = sdma_v4_4_2_sw_init,
+ .sw_fini = sdma_v4_4_2_sw_fini,
+ .hw_init = sdma_v4_4_2_hw_init,
+ .hw_fini = sdma_v4_4_2_hw_fini,
+ .suspend = sdma_v4_4_2_suspend,
+ .resume = sdma_v4_4_2_resume,
+ .is_idle = sdma_v4_4_2_is_idle,
+ .wait_for_idle = sdma_v4_4_2_wait_for_idle,
+ .soft_reset = sdma_v4_4_2_soft_reset,
+ .set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
+ .set_powergating_state = sdma_v4_4_2_set_powergating_state,
+ .get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xff,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .get_rptr = sdma_v4_4_2_ring_get_rptr,
+ .get_wptr = sdma_v4_4_2_page_ring_get_wptr,
+ .set_wptr = sdma_v4_4_2_page_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
+ 3 + /* hdp invalidate */
+ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
+ /* sdma_v4_4_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
+ .emit_ib = sdma_v4_4_2_ring_emit_ib,
+ .emit_fence = sdma_v4_4_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v4_4_2_ring_test_ring,
+ .test_ib = sdma_v4_4_2_ring_test_ib,
+ .insert_nop = sdma_v4_4_2_ring_insert_nop,
+ .pad_ib = sdma_v4_4_2_ring_pad_ib,
+ .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, dev_inst;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ if (adev->sdma.has_page_queue) {
+ adev->sdma.instance[i].page.funcs =
+ &sdma_v4_4_2_page_ring_funcs;
+ adev->sdma.instance[i].page.me = i;
+ }
+
+ dev_inst = GET_INST(SDMA0, i);
+ /* AID to which SDMA belongs depends on physical instance */
+ adev->sdma.instance[i].aid_id =
+ dev_inst / adev->sdma.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_trap_irq_funcs = {
+ .set = sdma_v4_4_2_set_trap_irq_state,
+ .process = sdma_v4_4_2_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_illegal_inst_irq_funcs = {
+ .process = sdma_v4_4_2_process_illegal_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ecc_irq_funcs = {
+ .set = sdma_v4_4_2_set_ecc_irq_state,
+ .process = amdgpu_sdma_process_ecc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_vm_hole_irq_funcs = {
+ .process = sdma_v4_4_2_process_vm_hole_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_4_2_process_doorbell_invalid_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_pool_timeout_irq_funcs = {
+ .process = sdma_v4_4_2_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
+ .process = sdma_v4_4_2_process_srbm_write_irq,
+};
+
+static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+
+ adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
+ adev->sdma.ecc_irq.funcs = &sdma_v4_4_2_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_4_2_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
+}
+
+/**
+ * sdma_v4_4_2_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: if a secure copy should be used
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v4_4_2_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
+};
+
+static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v4_4_2_buffer_funcs;
+ if (adev->sdma.has_page_queue)
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
+ else
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v4_4_2_vm_copy_pte,
+
+ .write_pte = sdma_v4_4_2_vm_write_pte,
+ .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
+};
+
+static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ struct drm_gpu_scheduler *sched;
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.has_page_queue)
+ sched = &adev->sdma.instance[i].page.sched;
+ else
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 4,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &sdma_v4_4_2_ip_funcs,
+};
+
+static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ if (!amdgpu_sriov_vf(adev))
+ sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask);
+
+ return r;
+}
+
+static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask = inst_mask;
+ int i;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, tmp_mask) {
+ amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ }
+ }
+
+ sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v4_4_2_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
+ .suspend = &sdma_v4_4_2_xcp_suspend,
+ .resume = &sdma_v4_4_2_xcp_resume
+};
+
+static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"},
+};
+
+static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = {
+ {AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"},
+ {AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"},
+ {AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"},
+ {AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"},
+ {AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"},
+ {AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"},
+ {AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"},
+ {AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"},
+ {AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"},
+ {AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"},
+};
+
+static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ /* sdma v4_4_2 doesn't support query ce counts */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_v4_4_2_ras_memory_list,
+ ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
+ sdma_dev_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t sdma_inst)
+{
+ uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ sdma_v4_2_2_ue_reg_list,
+ ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
+ sdma_dev_inst);
+}
+
+static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ int i = 0;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for_each_inst(i, inst_mask)
+ sdma_v4_4_2_inst_reset_ras_error_count(adev, i);
+ } else {
+ dev_warn(adev->dev, "SDMA RAS is not supported\n");
+ }
+}
+
+static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
+ .query_ras_error_count = sdma_v4_4_2_query_ras_error_count,
+ .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
+};
+
+static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
+ .ras_block = {
+ .hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ },
+};
+
+static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.ras = &sdma_v4_4_2_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
new file mode 100644
index 000000000..d51614552
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V4_4_2_H__
+#define __SDMA_V4_4_2_H__
+
+extern const struct amd_ip_funcs sdma_v4_4_2_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
new file mode 100644
index 000000000..1cc34efb4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -0,0 +1,1920 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "navi10_sdma_pkt_open.h"
+#include "nbio_v2_3.h"
+#include "sdma_common.h"
+#include "sdma_v5_0.h"
+
+MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
+
+MODULE_FIRMWARE("amdgpu/navi14_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x5893
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const struct soc15_reg_golden golden_settings_sdma_5[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_5_sriov[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00)
+};
+
+static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance == 1)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(5, 0, 0):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv10,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
+ break;
+ case IP_VERSION(5, 0, 2):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv14,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv14));
+ break;
+ case IP_VERSION(5, 0, 5):
+ if (amdgpu_sriov_vf(adev))
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5_sriov,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5_sriov));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv12,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv12));
+ break;
+ case IP_VERSION(5, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_cyan_skillfish,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish));
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * sdma_v5_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+
+// emulation only, won't work on real chip
+// navi10 real chip need to use PSP to load firmware
+static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ret = amdgpu_sdma_init_microcode(adev, i, false);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
+ amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+
+ return ret;
+}
+
+static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur > offset)
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+/**
+ * sdma_v5_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v5_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
+ wptr = wptr << 32;
+ wptr |= RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v5_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (NAVI10+).
+ */
+static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ *wptr_saved = ring->wptr << 2;
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index,
+ ring->wptr << 2);
+ }
+ } else {
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+ }
+}
+
+static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+/**
+ * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->me == 0)
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
+ else
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ uint32_t ctx = ring->is_mes_queue ?
+ (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ }
+}
+
+
+/**
+ * sdma_v5_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (NAVI10).
+ */
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v5_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (NAVI10).
+ */
+static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v5_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (NAVI10).
+ */
+static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl = 0, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!amdgpu_sriov_vf(adev)) {
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ }
+
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ phase_quantum);
+ }
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
+
+}
+
+/**
+ * sdma_v5_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (NAVI10).
+ */
+static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u32 wptr_poll_cntl;
+ u64 wptr_gpu_addr;
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
+ ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
+ ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
+ doorbell_offset);
+
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v5_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v5_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v5_0_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v5_0_ctx_switch_enable(adev, false);
+ sdma_v5_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v5_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v5_0_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v5_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v5_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v5_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init;
+}
+
+/**
+ * sdma_v5_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ }
+
+ r = amdgpu_ring_alloc(ring, 20);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v5_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (NAVI10).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+ ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+ }
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v5_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v5_0_ring_pad_ib - pad the IB
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+
+/**
+ * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v5_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v5_0_set_ring_funcs(adev);
+ sdma_v5_0_set_buffer_funcs(adev);
+ sdma_v5_0_set_vm_pte_funcs(adev);
+ sdma_v5_0_set_irq_funcs(adev);
+ sdma_v5_0_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+
+static int sdma_v5_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
+ SDMA0_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1,
+ SDMA1_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ r = sdma_v5_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index = (i == 0) ?
+ (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
+ : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v5_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_destroy_inst_ctx(adev, false);
+
+ return 0;
+}
+
+static int sdma_v5_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v5_0_init_golden_registers(adev);
+
+ r = sdma_v5_0_start(adev);
+
+ return r;
+}
+
+static int sdma_v5_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ return 0;
+ }
+
+ sdma_v5_0_ctx_switch_enable(adev, false);
+ sdma_v5_0_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v5_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_0_hw_fini(adev);
+}
+
+static int sdma_v5_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_0_hw_init(adev);
+}
+
+static bool sdma_v5_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v5_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v5_0_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ if (index == 0)
+ sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+ else
+ sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
+ sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
+ sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
+
+ return 0;
+}
+
+static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA1:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ /* Enable sdma clock gating */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ } else {
+ /* Disable sdma clock gating */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ }
+ }
+}
+
+static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ /* Enable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ } else {
+ /* Disable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ }
+ }
+}
+
+static int sdma_v5_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ sdma_v5_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ sdma_v5_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int sdma_v5_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
+ .name = "sdma_v5_0",
+ .early_init = sdma_v5_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v5_0_sw_init,
+ .sw_fini = sdma_v5_0_sw_fini,
+ .hw_init = sdma_v5_0_hw_init,
+ .hw_fini = sdma_v5_0_hw_fini,
+ .suspend = sdma_v5_0_suspend,
+ .resume = sdma_v5_0_resume,
+ .is_idle = sdma_v5_0_is_idle,
+ .wait_for_idle = sdma_v5_0_wait_for_idle,
+ .soft_reset = sdma_v5_0_soft_reset,
+ .set_clockgating_state = sdma_v5_0_set_clockgating_state,
+ .set_powergating_state = sdma_v5_0_set_powergating_state,
+ .get_clockgating_state = sdma_v5_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v5_0_ring_get_rptr,
+ .get_wptr = sdma_v5_0_ring_get_wptr,
+ .set_wptr = sdma_v5_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v5_0_ring_init_cond_exec */
+ 6 + /* sdma_v5_0_ring_emit_hdp_flush */
+ 3 + /* hdp_invalidate */
+ 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
+ /* sdma_v5_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
+ 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
+ .emit_ib = sdma_v5_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v5_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v5_0_ring_test_ring,
+ .test_ib = sdma_v5_0_ring_test_ib,
+ .insert_nop = sdma_v5_0_ring_insert_nop,
+ .pad_ib = sdma_v5_0_ring_pad_ib,
+ .emit_wreg = sdma_v5_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
+ .preempt_ib = sdma_v5_0_ring_preempt_ib,
+};
+
+static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = {
+ .set = sdma_v5_0_set_trap_irq_state,
+ .process = sdma_v5_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = {
+ .process = sdma_v5_0_process_illegal_inst_irq,
+};
+
+static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: if a secure copy should be used
+ *
+ * Copy GPU buffers using the DMA engine (NAVI10).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (NAVI10).
+ */
+static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v5_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v5_0_emit_fill_buffer,
+};
+
+static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mman.buffer_funcs == NULL) {
+ adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+ }
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_0_vm_copy_pte,
+ .write_pte = sdma_v5_0_vm_write_pte,
+ .set_pte_pde = sdma_v5_0_vm_set_pte_pde,
+};
+
+static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ if (adev->vm_manager.vm_pte_funcs == NULL) {
+ adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+ }
+}
+
+const struct amdgpu_ip_block_version sdma_v5_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v5_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
new file mode 100644
index 000000000..d4e3c2e69
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V5_0_H__
+#define __SDMA_V5_0_H__
+
+extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
+
+#endif /* __SDMA_V5_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
new file mode 100644
index 000000000..0f930fd8a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -0,0 +1,1861 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
+#include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h"
+#include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "navi10_sdma_pkt_open.h"
+#include "nbio_v2_3.h"
+#include "sdma_common.h"
+#include "sdma_v5_2.h"
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
+
+MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
+MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA3_REG_OFFSET 0x400
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x5893
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+
+static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ if (instance < 2) {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][2];
+ if (instance == 3)
+ internal_offset += SDMA3_REG_OFFSET;
+ }
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
+ amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+
+ return ret;
+}
+
+static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur > offset)
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+/**
+ * sdma_v5_2_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v5_2_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
+ wptr = wptr << 32;
+ wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
+ DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v5_2_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (NAVI10+).
+ */
+static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB |
+ SDMA_GCR_GLM_INV | SDMA_GCR_GL1_INV |
+ SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+/**
+ * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if ((flags & AMDGPU_FENCE_FLAG_INT)) {
+ uint32_t ctx = ring->is_mes_queue ?
+ (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ }
+}
+
+
+/**
+ * sdma_v5_2_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v5_2_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ phase_quantum);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
+ }
+
+}
+
+/**
+ * sdma_v5_2_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_rlc_stop(adev);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
+ }
+}
+
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u32 wptr_poll_cntl;
+ u64 wptr_gpu_addr;
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_2_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v5_2_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1/2/3 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v5_2_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+static int sdma_v5_2_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 grbm_soft_reset;
+ u32 tmp;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= i;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_2_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v5_2_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v5_2_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ sdma_v5_2_soft_reset(adev);
+ /* unhalt the MEs */
+ sdma_v5_2_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v5_2_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v5_2_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v10_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
+ mmSDMA0_GFX_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
+
+ return 0;
+}
+
+static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
+}
+
+/**
+ * sdma_v5_2_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ }
+
+ r = amdgpu_ring_alloc(ring, 20);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v5_2_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+ ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+ }
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v5_2_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v5_2_ring_pad_ib - pad the IB
+ *
+ * @ib: indirect buffer to fill with padding
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+
+/**
+ * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v5_2_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v5_2_set_ring_funcs(adev);
+ sdma_v5_2_set_buffer_funcs(adev);
+ sdma_v5_2_set_vm_pte_funcs(adev);
+ sdma_v5_2_set_irq_funcs(adev);
+ sdma_v5_2_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+static unsigned sdma_v5_2_seq_to_irq_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SOC15_IH_CLIENTID_SDMA0;
+ case 1:
+ return SOC15_IH_CLIENTID_SDMA1;
+ case 2:
+ return SOC15_IH_CLIENTID_SDMA2;
+ case 3:
+ return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
+{
+ switch (seq_num) {
+ case 0:
+ return SDMA0_5_0__SRCID__SDMA_TRAP;
+ case 1:
+ return SDMA1_5_0__SRCID__SDMA_TRAP;
+ case 2:
+ return SDMA2_5_0__SRCID__SDMA_TRAP;
+ case 3:
+ return SDMA3_5_0__SRCID__SDMA_TRAP;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+static int sdma_v5_2_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i),
+ sdma_v5_2_seq_to_trap_id(i),
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+
+ DRM_INFO("use_doorbell being set to: [%s]\n",
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v5_2_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ return 0;
+}
+
+static int sdma_v5_2_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_2_start(adev);
+}
+
+static int sdma_v5_2_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ return 0;
+ }
+
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v5_2_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_2_hw_fini(adev);
+}
+
+static int sdma_v5_2_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_2_hw_init(adev);
+}
+
+static bool sdma_v5_2_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v5_2_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 sdma0, sdma1, sdma2, sdma3;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
+ sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG));
+ sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+ u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
+
+ return 0;
+}
+
+static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA1:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA2:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[2].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[3].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
+ int i)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(5, 2, 1):
+ if (adev->sdma.instance[i].fw_version < 70)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 3):
+ if (adev->sdma.instance[i].fw_version < 47)
+ return false;
+ break;
+ case IP_VERSION(5, 2, 7):
+ if (adev->sdma.instance[i].fw_version < 9)
+ return false;
+ break;
+ default:
+ return true;
+ }
+
+ return true;
+
+}
+
+static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+
+ if (!sdma_v5_2_firmware_mgcg_support(adev, i))
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ /* Enable sdma clock gating */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ } else {
+ /* Disable sdma clock gating */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ }
+ }
+}
+
+static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+
+ if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ /* Enable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ } else {
+ /* Disable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ }
+ }
+}
+
+static int sdma_v5_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 4):
+ case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 7):
+ sdma_v5_2_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ sdma_v5_2_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int sdma_v5_2_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
+ .name = "sdma_v5_2",
+ .early_init = sdma_v5_2_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v5_2_sw_init,
+ .sw_fini = sdma_v5_2_sw_fini,
+ .hw_init = sdma_v5_2_hw_init,
+ .hw_fini = sdma_v5_2_hw_fini,
+ .suspend = sdma_v5_2_suspend,
+ .resume = sdma_v5_2_resume,
+ .is_idle = sdma_v5_2_is_idle,
+ .wait_for_idle = sdma_v5_2_wait_for_idle,
+ .soft_reset = sdma_v5_2_soft_reset,
+ .set_clockgating_state = sdma_v5_2_set_clockgating_state,
+ .set_powergating_state = sdma_v5_2_set_powergating_state,
+ .get_clockgating_state = sdma_v5_2_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v5_2_ring_get_rptr,
+ .get_wptr = sdma_v5_2_ring_get_wptr,
+ .set_wptr = sdma_v5_2_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v5_2_ring_init_cond_exec */
+ 6 + /* sdma_v5_2_ring_emit_hdp_flush */
+ 3 + /* hdp_invalidate */
+ 6 + /* sdma_v5_2_ring_emit_pipeline_sync */
+ /* sdma_v5_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
+ .emit_ib = sdma_v5_2_ring_emit_ib,
+ .emit_mem_sync = sdma_v5_2_ring_emit_mem_sync,
+ .emit_fence = sdma_v5_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v5_2_ring_test_ring,
+ .test_ib = sdma_v5_2_ring_test_ib,
+ .insert_nop = sdma_v5_2_ring_insert_nop,
+ .pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
+ .emit_wreg = sdma_v5_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v5_2_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
+ .preempt_ib = sdma_v5_2_ring_preempt_ib,
+};
+
+static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = {
+ .set = sdma_v5_2_set_trap_irq_state,
+ .process = sdma_v5_2_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = {
+ .process = sdma_v5_2_process_illegal_inst_irq,
+};
+
+static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: if a secure copy should be used
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
+};
+
+static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mman.buffer_funcs == NULL) {
+ adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+ }
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_2_vm_copy_pte,
+ .write_pte = sdma_v5_2_vm_write_pte,
+ .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
+};
+
+static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ if (adev->vm_manager.vm_pte_funcs == NULL) {
+ adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+ }
+}
+
+const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 5,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &sdma_v5_2_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
new file mode 100644
index 000000000..b70414fef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V5_2_H__
+#define __SDMA_V5_2_H__
+
+extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
+
+#endif /* __SDMA_V5_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
new file mode 100644
index 000000000..45be0af25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -0,0 +1,1711 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v6_0.h"
+#include "v11_structs.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v6_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
+ amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+
+ return ret;
+}
+
+static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur > offset)
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+/**
+ * sdma_v6_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v6_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v6_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
+
+ DRM_DEBUG("Setting write pointer\n");
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ *wptr_saved = ring->wptr << 2;
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index,
+ ring->wptr << 2);
+ }
+ } else {
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+ }
+}
+
+static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: IB object to schedule
+ * @flags: unused
+ * @job: job to retrieve vmid from
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v6_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ uint32_t ctx = ring->is_mes_queue ?
+ (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable context switching due to queue empty conditions
+ *
+ * Enable or disable the async dma engines queue empty context switch.
+ */
+static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ CTXEMPTY_INT_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl);
+ }
+ }
+}
+
+/**
+ * sdma_v6_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v6_0_gfx_stop(adev);
+ sdma_v6_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_enable(adev, true);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v6_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v6_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+ bool use_broadcast;
+
+ /* halt the MEs */
+ sdma_v6_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ /* use broadcast mode to load SDMA microcode by default */
+ use_broadcast = true;
+
+ if (use_broadcast) {
+ dev_info(adev->dev, "Use broadcast method to load SDMA firmware\n");
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+ } else {
+ dev_info(adev->dev, "Use legacy method to load SDMA firmware\n");
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* load Control Thread microcode */
+ hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+
+ /* load Context Switch microcode */
+ fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ctl_ucode_offset));
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0x8000);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp;
+ int i;
+
+ sdma_v6_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v6_0_start(adev);
+}
+
+static bool sdma_v6_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * sdma_v6_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v6_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v6_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v6_0_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of regSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v6_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v6_0_ctxempty_int_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v6_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v6_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v11_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_skip_cntl = 0;
+ m->sdmax_rlcx_context_status = 0;
+ m->sdmax_rlcx_doorbell_log = 0;
+
+ m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ return 0;
+}
+
+static void sdma_v6_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v11_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v6_0_mqd_init;
+}
+
+/**
+ * sdma_v6_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ }
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v6_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ volatile uint32_t *cpu_ptr = NULL;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ if (ring->is_mes_queue) {
+ uint32_t offset = 0;
+ offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+ ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+ offset = amdgpu_mes_ctx_get_offs(ring,
+ AMDGPU_MES_CTX_PADDING_OFFS);
+ gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+ cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+ *cpu_ptr = tmp;
+ } else {
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ if (ring->is_mes_queue)
+ tmp = le32_to_cpu(*cpu_ptr);
+ else
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v6_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v6_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v6_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v6_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v6_0_ring_pad_ib - pad the IB
+ * @ib: indirect buffer to fill with padding
+ * @ring: amdgpu ring pointer
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v6_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v6_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
+}
+
+static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static struct amdgpu_sdma_ras sdma_v6_0_3_ras = {
+ .ras_block = {
+ .ras_late_init = amdgpu_ras_block_late_init,
+ },
+};
+
+static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ case IP_VERSION(6, 0, 3):
+ adev->sdma.ras = &sdma_v6_0_3_ras;
+ break;
+ default:
+ break;
+ }
+
+}
+
+static int sdma_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v6_0_set_ring_funcs(adev);
+ sdma_v6_0_set_buffer_funcs(adev);
+ sdma_v6_0_set_vm_pte_funcs(adev);
+ sdma_v6_0_set_irq_funcs(adev);
+ sdma_v6_0_set_mqd_funcs(adev);
+ sdma_v6_0_set_ras_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v6_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_sdma_ras_sw_init(adev)) {
+ dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int sdma_v6_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ return 0;
+}
+
+static int sdma_v6_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v6_0_start(adev);
+}
+
+static int sdma_v6_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* disable the scheduler for SDMA */
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ return 0;
+ }
+
+ sdma_v6_0_ctxempty_int_enable(adev, false);
+ sdma_v6_0_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v6_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v6_0_hw_fini(adev);
+}
+
+static int sdma_v6_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v6_0_hw_init(adev);
+}
+
+static bool sdma_v6_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v6_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v6_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v6_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v6_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags)
+{
+}
+
+const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
+ .name = "sdma_v6_0",
+ .early_init = sdma_v6_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v6_0_sw_init,
+ .sw_fini = sdma_v6_0_sw_fini,
+ .hw_init = sdma_v6_0_hw_init,
+ .hw_fini = sdma_v6_0_hw_fini,
+ .suspend = sdma_v6_0_suspend,
+ .resume = sdma_v6_0_resume,
+ .is_idle = sdma_v6_0_is_idle,
+ .wait_for_idle = sdma_v6_0_wait_for_idle,
+ .soft_reset = sdma_v6_0_soft_reset,
+ .check_soft_reset = sdma_v6_0_check_soft_reset,
+ .set_clockgating_state = sdma_v6_0_set_clockgating_state,
+ .set_powergating_state = sdma_v6_0_set_powergating_state,
+ .get_clockgating_state = sdma_v6_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v6_0_ring_get_rptr,
+ .get_wptr = sdma_v6_0_ring_get_wptr,
+ .set_wptr = sdma_v6_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v6_0_ring_init_cond_exec */
+ 6 + /* sdma_v6_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v6_0_ring_emit_pipeline_sync */
+ /* sdma_v6_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v6_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v6_0_ring_emit_ib */
+ .emit_ib = sdma_v6_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v6_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v6_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v6_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v6_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v6_0_ring_test_ring,
+ .test_ib = sdma_v6_0_ring_test_ib,
+ .insert_nop = sdma_v6_0_ring_insert_nop,
+ .pad_ib = sdma_v6_0_ring_pad_ib,
+ .emit_wreg = sdma_v6_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v6_0_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
+ .preempt_ib = sdma_v6_0_ring_preempt_ib,
+};
+
+static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v6_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
+ .set = sdma_v6_0_set_trap_irq_state,
+ .process = sdma_v6_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
+ .process = sdma_v6_0_process_illegal_inst_irq,
+};
+
+static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: if a secure copy should be used
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
+};
+
+static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v6_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v6_0_vm_copy_pte,
+ .write_pte = sdma_v6_0_vm_write_pte,
+ .set_pte_pde = sdma_v6_0_vm_set_pte_pde,
+};
+
+static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
new file mode 100644
index 000000000..e473ec7df
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V6_0_H__
+#define __SDMA_V6_0_H__
+
+extern const struct amd_ip_funcs sdma_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v6_0_ip_block;
+
+#endif /* __SDMA_V6_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
new file mode 100644
index 000000000..6af23e788
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
@@ -0,0 +1,5664 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V6_0_0_PKT_OPEN_H_
+#define __SDMA_V6_0_0_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_LARGE 36
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_MEM_INCR 1
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for addr_pair_num field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_ADDR_PAIR_NUM(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift)
+
+/*define for DW_4 word*/
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift)
+
+/*define for DW_5 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift)
+
+/*define for DW_6 word*/
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for src_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_SRC_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift)
+
+/*define for DW_8 word*/
+/*define for src_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_SRC_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_11 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift)
+
+/*define for DW_12 word*/
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift)
+
+/*define for DW_13 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_offset 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift)
+
+/*define for DW_14 word*/
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_offset 14
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift)
+
+/*define for DW_15 word*/
+/*define for dst_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_offset 15
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_DST_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift)
+
+/*define for DW_16 word*/
+/*define for dst_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift)
+
+/*define for dst_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift)
+
+/*define for src_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift)
+
+/*define for DW_17 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_offset 17
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift)
+
+/*define for DW_18 word*/
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_offset 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift)
+
+/*define for DW_19 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_offset 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_T2T_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_cpv_mask) << SDMA_PKT_COPY_T2T_HEADER_cpv_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_T2T_DW_14_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_T2T_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_STRUCT_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask) << SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift 26
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_UNTILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift 26
+#define SDMA_PKT_WRITE_UNTILED_DW_3_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_TILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_7_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask) << SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18
+#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_COPY_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_cpv_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift 22
+#define SDMA_PKT_PTEPDE_COPY_COUNT_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift 29
+#define SDMA_PKT_PTEPDE_COPY_COUNT_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift 26
+#define SDMA_PKT_PTEPDE_RMW_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_RMW_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+/*define for COUNT word*/
+/*define for num_of_pte field*/
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_offset 7
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift 0
+#define SDMA_PKT_PTEPDE_RMW_COUNT_NUM_OF_PTE(x) (((x) & SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask) << SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift)
+
+
+/*
+** Definitions for SDMA_PKT_REGISTER_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_shift 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_REGISTER_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_shift 0
+#define SDMA_PKT_REGISTER_RMW_ADDR_ADDR(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_addr_mask) << SDMA_PKT_REGISTER_RMW_ADDR_addr_shift)
+
+/*define for aperture_id field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift 20
+#define SDMA_PKT_REGISTER_RMW_ADDR_APERTURE_ID(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask) << SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_offset 2
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_shift 0
+#define SDMA_PKT_REGISTER_RMW_MASK_MASK(x) (((x) & SDMA_PKT_REGISTER_RMW_MASK_mask_mask) << SDMA_PKT_REGISTER_RMW_MASK_mask_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_offset 3
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_shift 0
+#define SDMA_PKT_REGISTER_RMW_VALUE_VALUE(x) (((x) & SDMA_PKT_REGISTER_RMW_VALUE_value_mask) << SDMA_PKT_REGISTER_RMW_VALUE_value_shift)
+
+/*define for MISC word*/
+/*define for stride field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_shift 0
+#define SDMA_PKT_REGISTER_RMW_MISC_STRIDE(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_stride_mask) << SDMA_PKT_REGISTER_RMW_MISC_stride_shift)
+
+/*define for num_of_reg field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift 20
+#define SDMA_PKT_REGISTER_RMW_MISC_NUM_OF_REG(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask) << SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift 24
+#define SDMA_PKT_WRITE_INCR_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask) << SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cpv_mask) << SDMA_PKT_WRITE_INCR_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_MEM_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_op_shift 0
+#define SDMA_PKT_MEM_INCR_HEADER_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_op_mask) << SDMA_PKT_MEM_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_MEM_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_sub_op_mask) << SDMA_PKT_MEM_INCR_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift 24
+#define SDMA_PKT_MEM_INCR_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift 26
+#define SDMA_PKT_MEM_INCR_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_MEM_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_cpv_mask) << SDMA_PKT_MEM_INCR_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask) << SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask) << SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_FENCE_HEADER_llc_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_llc_policy_shift 26
+#define SDMA_PKT_FENCE_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_llc_policy_mask) << SDMA_PKT_FENCE_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_FENCE_HEADER_cpv_offset 0
+#define SDMA_PKT_FENCE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_cpv_shift 28
+#define SDMA_PKT_FENCE_HEADER_CPV(x) (((x) & SDMA_PKT_FENCE_HEADER_cpv_mask) << SDMA_PKT_FENCE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_shift 24
+#define SDMA_PKT_COND_EXE_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cache_policy_mask) << SDMA_PKT_COND_EXE_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COND_EXE_HEADER_cpv_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COND_EXE_HEADER_cpv_shift 28
+#define SDMA_PKT_COND_EXE_HEADER_CPV(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cpv_mask) << SDMA_PKT_COND_EXE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift 24
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift 28
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CPV(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift 24
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift 28
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CPV(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift 20
+#define SDMA_PKT_POLL_REGMEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift 24
+#define SDMA_PKT_POLL_REGMEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp0_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP0_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp0_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP0_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_shift 20
+#define SDMA_PKT_ATOMIC_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cache_policy_mask) << SDMA_PKT_ATOMIC_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_ATOMIC_HEADER_cpv_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_cpv_shift 24
+#define SDMA_PKT_ATOMIC_HEADER_CPV(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cpv_mask) << SDMA_PKT_ATOMIC_HEADER_cpv_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift)
+
+/*define for gcr_control_15_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift)
+
+/*define for limit_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_HEADER_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_cpv_mask) << SDMA_AQL_PKT_HEADER_HEADER_cpv_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for CACHE_POLICY word*/
+/*define for cache_policy0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift)
+
+/*define for cache_policy1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift 5
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift)
+
+/*define for cache_policy2 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift 10
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY2(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift)
+
+/*define for cache_policy3 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift 15
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY3(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift)
+
+/*define for cache_policy4 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY4(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __SDMA_V6_0_0_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
new file mode 100644
index 000000000..4b81f29e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -0,0 +1,2789 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "atom.h"
+#include "amd_pcie.h"
+#include "si_dpm.h"
+#include "sid.h"
+#include "si_ih.h"
+#include "gfx_v6_0.h"
+#include "gmc_v6_0.h"
+#include "si_dma.h"
+#include "dce_v6_0.h"
+#include "si.h"
+#include "uvd_v3_1.h"
+#include "amdgpu_vkms.h"
+#include "gca/gfx_6_0_d.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+#include "gmc/gmc_6_0_d.h"
+#include "dce/dce_6_0_d.h"
+#include "uvd/uvd_4_0_d.h"
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+
+#include "amdgpu_dm.h"
+
+static const u32 tahiti_golden_registers[] =
+{
+ mmAZALIA_SCLK_CONTROL, 0x00000030, 0x00000011,
+ mmCB_HW_CONTROL, 0x00010000, 0x00018208,
+ mmDB_DEBUG, 0xffffffff, 0x00000000,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0x0002021c, 0x00020200,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ 0x340c, 0x000000c0, 0x00800040,
+ 0x360c, 0x000000c0, 0x00800040,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x00200000, 0x50100000,
+ mmDIG0_HDMI_CONTROL, 0x31000311, 0x00000011,
+ mmMC_ARB_WTM_CNTL_RD, 0x00000003, 0x000007ff,
+ mmMC_XPB_P2P_BAR_CFG, 0x000007ff, 0x00000000,
+ mmPA_CL_ENHANCE, 0xf000001f, 0x00000007,
+ mmPA_SC_FORCE_EOV_MAX_CNTS, 0xffffffff, 0x00ffffff,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_MODE_CNTL_1, 0x07ffffff, 0x4e000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3f3fff, 0x2a00126a,
+ 0x000c, 0xffffffff, 0x0040,
+ 0x000d, 0x00000040, 0x00004040,
+ mmSPI_CONFIG_CNTL, 0x07ffffff, 0x03000000,
+ mmSQ_DED_CNT, 0x01ff1f3f, 0x00000000,
+ mmSQ_SEC_CNT, 0x01ff1f3f, 0x00000000,
+ mmSX_DEBUG_1, 0x0000007f, 0x00000020,
+ mmTA_CNTL_AUX, 0x00010000, 0x00010000,
+ mmTCP_ADDR_CONFIG, 0x00000200, 0x000002fb,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
+ mmVGT_FIFO_DEPTHS, 0xffffffff, 0x000fff40,
+ mmVGT_GS_VERTEX_REUSE, 0x0000001f, 0x00000010,
+ mmVM_CONTEXT0_CNTL, 0x20000000, 0x20fffed8,
+ mmVM_L2_CG, 0x000c0fc0, 0x000c0400,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0xffffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 tahiti_golden_registers2[] =
+{
+ mmMCIF_MEM_CONTROL, 0x00000001, 0x00000001,
+};
+
+static const u32 tahiti_golden_rlc_registers[] =
+{
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
+ mmRLC_LB_PARAMS, 0xffffffff, 0x00601005,
+ 0x311f, 0xffffffff, 0x10104040,
+ 0x3122, 0xffffffff, 0x0100000a,
+ mmRLC_LB_CNTR_MAX, 0xffffffff, 0x00000800,
+ mmRLC_LB_CNTL, 0xffffffff, 0x800000f4,
+ mmUVD_CGC_GATE, 0x00000008, 0x00000000,
+};
+
+static const u32 pitcairn_golden_registers[] =
+{
+ mmAZALIA_SCLK_CONTROL, 0x00000030, 0x00000011,
+ mmCB_HW_CONTROL, 0x00010000, 0x00018208,
+ mmDB_DEBUG, 0xffffffff, 0x00000000,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0x0002021c, 0x00020200,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ 0x340c, 0x000300c0, 0x00800040,
+ 0x360c, 0x000300c0, 0x00800040,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x00200000, 0x50100000,
+ mmDIG0_HDMI_CONTROL, 0x31000311, 0x00000011,
+ mmMC_SEQ_PMG_PG_HWCNTL, 0x00073ffe, 0x000022a2,
+ mmMC_XPB_P2P_BAR_CFG, 0x000007ff, 0x00000000,
+ mmPA_CL_ENHANCE, 0xf000001f, 0x00000007,
+ mmPA_SC_FORCE_EOV_MAX_CNTS, 0xffffffff, 0x00ffffff,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_MODE_CNTL_1, 0x07ffffff, 0x4e000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3f3fff, 0x2a00126a,
+ 0x000c, 0xffffffff, 0x0040,
+ 0x000d, 0x00000040, 0x00004040,
+ mmSPI_CONFIG_CNTL, 0x07ffffff, 0x03000000,
+ mmSX_DEBUG_1, 0x0000007f, 0x00000020,
+ mmTA_CNTL_AUX, 0x00010000, 0x00010000,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+ mmVGT_GS_VERTEX_REUSE, 0x0000001f, 0x00000010,
+ mmVM_L2_CG, 0x000c0fc0, 0x000c0400,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0xffffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 pitcairn_golden_rlc_registers[] =
+{
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
+ mmRLC_LB_PARAMS, 0xffffffff, 0x00601004,
+ 0x311f, 0xffffffff, 0x10102020,
+ 0x3122, 0xffffffff, 0x01000020,
+ mmRLC_LB_CNTR_MAX, 0xffffffff, 0x00000800,
+ mmRLC_LB_CNTL, 0xffffffff, 0x800000a4,
+};
+
+static const u32 verde_pg_init[] =
+{
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x40000,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x200010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x7007,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x300010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x400000,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x100010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x120200,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x500010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x1e1e16,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x600010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x171f1e,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x700010ff,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_WRITE, 0xffffffff, 0x0,
+ mmGMCON_PGFSM_CONFIG, 0xffffffff, 0x9ff,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x0,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x10000800,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xf,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xf,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x4,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1000051e,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xffff,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xffff,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x8,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x80500,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x12,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x9050c,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x1d,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xb052c,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x2a,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1053e,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x2d,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x10546,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x30,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xa054e,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x3c,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1055f,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x3f,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x10567,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x42,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1056f,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x45,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x10572,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x48,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x20575,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x4c,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x190801,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x67,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1082a,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x6a,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1b082d,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x87,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x310851,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xba,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x891,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xbc,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x893,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xbe,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x20895,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xc2,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x20899,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xc6,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x2089d,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xca,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x8a1,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xcc,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x8a3,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xce,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x308a5,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0xd3,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x6d08cd,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x142,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x2000095a,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x1,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x144,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x301f095b,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x165,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xc094d,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x173,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xf096d,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x184,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x15097f,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x19b,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xc0998,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x1a9,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x409a7,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x1af,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0xcdc,
+ mmGMCON_RENG_RAM_INDEX, 0xffffffff, 0x1b1,
+ mmGMCON_RENG_RAM_DATA, 0xffffffff, 0x800,
+ mmGMCON_RENG_EXECUTE, 0xffffffff, 0x6c9b2000,
+ mmGMCON_MISC2, 0xfc00, 0x2000,
+ mmGMCON_MISC3, 0xffffffff, 0xfc0,
+ mmMC_PMG_AUTO_CFG, 0x00000100, 0x100,
+};
+
+static const u32 verde_golden_rlc_registers[] =
+{
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x02010002,
+ mmRLC_LB_PARAMS, 0xffffffff, 0x033f1005,
+ 0x311f, 0xffffffff, 0x10808020,
+ 0x3122, 0xffffffff, 0x00800008,
+ mmRLC_LB_CNTR_MAX, 0xffffffff, 0x00001000,
+ mmRLC_LB_CNTL, 0xffffffff, 0x80010014,
+};
+
+static const u32 verde_golden_registers[] =
+{
+ mmAZALIA_SCLK_CONTROL, 0x00000030, 0x00000011,
+ mmCB_HW_CONTROL, 0x00010000, 0x00018208,
+ mmDB_DEBUG, 0xffffffff, 0x00000000,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0x0002021c, 0x00020200,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ 0x340c, 0x000300c0, 0x00800040,
+ 0x360c, 0x000300c0, 0x00800040,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x00200000, 0x50100000,
+ mmDIG0_HDMI_CONTROL, 0x31000311, 0x00000011,
+ mmMC_SEQ_PMG_PG_HWCNTL, 0x00073ffe, 0x000022a2,
+ mmMC_XPB_P2P_BAR_CFG, 0x000007ff, 0x00000000,
+ mmPA_CL_ENHANCE, 0xf000001f, 0x00000007,
+ mmPA_SC_FORCE_EOV_MAX_CNTS, 0xffffffff, 0x00ffffff,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_MODE_CNTL_1, 0x07ffffff, 0x4e000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3f3fff, 0x0000124a,
+ 0x000c, 0xffffffff, 0x0040,
+ 0x000d, 0x00000040, 0x00004040,
+ mmSPI_CONFIG_CNTL, 0x07ffffff, 0x03000000,
+ mmSQ_DED_CNT, 0x01ff1f3f, 0x00000000,
+ mmSQ_SEC_CNT, 0x01ff1f3f, 0x00000000,
+ mmSX_DEBUG_1, 0x0000007f, 0x00000020,
+ mmTA_CNTL_AUX, 0x00010000, 0x00010000,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x00000003,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001032,
+ mmVGT_GS_VERTEX_REUSE, 0x0000001f, 0x00000010,
+ mmVM_L2_CG, 0x000c0fc0, 0x000c0400,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0xffffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 oland_golden_registers[] =
+{
+ mmAZALIA_SCLK_CONTROL, 0x00000030, 0x00000011,
+ mmCB_HW_CONTROL, 0x00010000, 0x00018208,
+ mmDB_DEBUG, 0xffffffff, 0x00000000,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0x0002021c, 0x00020200,
+ mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+ 0x340c, 0x000300c0, 0x00800040,
+ 0x360c, 0x000300c0, 0x00800040,
+ mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+ mmFBC_MISC, 0x00200000, 0x50100000,
+ mmDIG0_HDMI_CONTROL, 0x31000311, 0x00000011,
+ mmMC_SEQ_PMG_PG_HWCNTL, 0x00073ffe, 0x000022a2,
+ mmMC_XPB_P2P_BAR_CFG, 0x000007ff, 0x00000000,
+ mmPA_CL_ENHANCE, 0xf000001f, 0x00000007,
+ mmPA_SC_FORCE_EOV_MAX_CNTS, 0xffffffff, 0x00ffffff,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_MODE_CNTL_1, 0x07ffffff, 0x4e000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3f3fff, 0x00000082,
+ 0x000c, 0xffffffff, 0x0040,
+ 0x000d, 0x00000040, 0x00004040,
+ mmSPI_CONFIG_CNTL, 0x07ffffff, 0x03000000,
+ mmSX_DEBUG_1, 0x0000007f, 0x00000020,
+ mmTA_CNTL_AUX, 0x00010000, 0x00010000,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+ mmVGT_GS_VERTEX_REUSE, 0x0000001f, 0x00000010,
+ mmVM_L2_CG, 0x000c0fc0, 0x000c0400,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0xffffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+
+};
+
+static const u32 oland_golden_rlc_registers[] =
+{
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x02010002,
+ mmRLC_LB_PARAMS, 0xffffffff, 0x00601005,
+ 0x311f, 0xffffffff, 0x10104040,
+ 0x3122, 0xffffffff, 0x0100000a,
+ mmRLC_LB_CNTR_MAX, 0xffffffff, 0x00000800,
+ mmRLC_LB_CNTL, 0xffffffff, 0x800000f4,
+};
+
+static const u32 hainan_golden_registers[] =
+{
+ 0x17bc, 0x00000030, 0x00000011,
+ mmCB_HW_CONTROL, 0x00010000, 0x00018208,
+ mmDB_DEBUG, 0xffffffff, 0x00000000,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmDB_DEBUG3, 0x0002021c, 0x00020200,
+ 0x031e, 0x00000080, 0x00000000,
+ 0x3430, 0xff000fff, 0x00000100,
+ 0x340c, 0x000300c0, 0x00800040,
+ 0x3630, 0xff000fff, 0x00000100,
+ 0x360c, 0x000300c0, 0x00800040,
+ 0x16ec, 0x000000f0, 0x00000070,
+ 0x16f0, 0x00200000, 0x50100000,
+ 0x1c0c, 0x31000311, 0x00000011,
+ mmMC_SEQ_PMG_PG_HWCNTL, 0x00073ffe, 0x000022a2,
+ mmMC_XPB_P2P_BAR_CFG, 0x000007ff, 0x00000000,
+ mmPA_CL_ENHANCE, 0xf000001f, 0x00000007,
+ mmPA_SC_FORCE_EOV_MAX_CNTS, 0xffffffff, 0x00ffffff,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_MODE_CNTL_1, 0x07ffffff, 0x4e000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3f3fff, 0x00000000,
+ 0x000c, 0xffffffff, 0x0040,
+ 0x000d, 0x00000040, 0x00004040,
+ mmSPI_CONFIG_CNTL, 0x03e00000, 0x03600000,
+ mmSX_DEBUG_1, 0x0000007f, 0x00000020,
+ mmTA_CNTL_AUX, 0x00010000, 0x00010000,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+ mmVGT_GS_VERTEX_REUSE, 0x0000001f, 0x00000010,
+ mmVM_L2_CG, 0x000c0fc0, 0x000c0400,
+ mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0xffffffff,
+ mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+ mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff,
+};
+
+static const u32 hainan_golden_registers2[] =
+{
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x2011003,
+};
+
+static const u32 tahiti_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xfffffffc,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ 0x2458, 0xffffffff, 0x00010000,
+ 0x2459, 0xffffffff, 0x00030002,
+ 0x245a, 0xffffffff, 0x00040007,
+ 0x245b, 0xffffffff, 0x00060005,
+ 0x245c, 0xffffffff, 0x00090008,
+ 0x245d, 0xffffffff, 0x00020001,
+ 0x245e, 0xffffffff, 0x00040003,
+ 0x245f, 0xffffffff, 0x00000007,
+ 0x2460, 0xffffffff, 0x00060005,
+ 0x2461, 0xffffffff, 0x00090008,
+ 0x2462, 0xffffffff, 0x00030002,
+ 0x2463, 0xffffffff, 0x00050004,
+ 0x2464, 0xffffffff, 0x00000008,
+ 0x2465, 0xffffffff, 0x00070006,
+ 0x2466, 0xffffffff, 0x000a0009,
+ 0x2467, 0xffffffff, 0x00040003,
+ 0x2468, 0xffffffff, 0x00060005,
+ 0x2469, 0xffffffff, 0x00000009,
+ 0x246a, 0xffffffff, 0x00080007,
+ 0x246b, 0xffffffff, 0x000b000a,
+ 0x246c, 0xffffffff, 0x00050004,
+ 0x246d, 0xffffffff, 0x00070006,
+ 0x246e, 0xffffffff, 0x0008000b,
+ 0x246f, 0xffffffff, 0x000a0009,
+ 0x2470, 0xffffffff, 0x000d000c,
+ 0x2471, 0xffffffff, 0x00060005,
+ 0x2472, 0xffffffff, 0x00080007,
+ 0x2473, 0xffffffff, 0x0000000b,
+ 0x2474, 0xffffffff, 0x000a0009,
+ 0x2475, 0xffffffff, 0x000d000c,
+ 0x2476, 0xffffffff, 0x00070006,
+ 0x2477, 0xffffffff, 0x00090008,
+ 0x2478, 0xffffffff, 0x0000000c,
+ 0x2479, 0xffffffff, 0x000b000a,
+ 0x247a, 0xffffffff, 0x000e000d,
+ 0x247b, 0xffffffff, 0x00080007,
+ 0x247c, 0xffffffff, 0x000a0009,
+ 0x247d, 0xffffffff, 0x0000000d,
+ 0x247e, 0xffffffff, 0x000c000b,
+ 0x247f, 0xffffffff, 0x000f000e,
+ 0x2480, 0xffffffff, 0x00090008,
+ 0x2481, 0xffffffff, 0x000b000a,
+ 0x2482, 0xffffffff, 0x000c000f,
+ 0x2483, 0xffffffff, 0x000e000d,
+ 0x2484, 0xffffffff, 0x00110010,
+ 0x2485, 0xffffffff, 0x000a0009,
+ 0x2486, 0xffffffff, 0x000c000b,
+ 0x2487, 0xffffffff, 0x0000000f,
+ 0x2488, 0xffffffff, 0x000e000d,
+ 0x2489, 0xffffffff, 0x00110010,
+ 0x248a, 0xffffffff, 0x000b000a,
+ 0x248b, 0xffffffff, 0x000d000c,
+ 0x248c, 0xffffffff, 0x00000010,
+ 0x248d, 0xffffffff, 0x000f000e,
+ 0x248e, 0xffffffff, 0x00120011,
+ 0x248f, 0xffffffff, 0x000c000b,
+ 0x2490, 0xffffffff, 0x000e000d,
+ 0x2491, 0xffffffff, 0x00000011,
+ 0x2492, 0xffffffff, 0x0010000f,
+ 0x2493, 0xffffffff, 0x00130012,
+ 0x2494, 0xffffffff, 0x000d000c,
+ 0x2495, 0xffffffff, 0x000f000e,
+ 0x2496, 0xffffffff, 0x00100013,
+ 0x2497, 0xffffffff, 0x00120011,
+ 0x2498, 0xffffffff, 0x00150014,
+ 0x2499, 0xffffffff, 0x000e000d,
+ 0x249a, 0xffffffff, 0x0010000f,
+ 0x249b, 0xffffffff, 0x00000013,
+ 0x249c, 0xffffffff, 0x00120011,
+ 0x249d, 0xffffffff, 0x00150014,
+ 0x249e, 0xffffffff, 0x000f000e,
+ 0x249f, 0xffffffff, 0x00110010,
+ 0x24a0, 0xffffffff, 0x00000014,
+ 0x24a1, 0xffffffff, 0x00130012,
+ 0x24a2, 0xffffffff, 0x00160015,
+ 0x24a3, 0xffffffff, 0x0010000f,
+ 0x24a4, 0xffffffff, 0x00120011,
+ 0x24a5, 0xffffffff, 0x00000015,
+ 0x24a6, 0xffffffff, 0x00140013,
+ 0x24a7, 0xffffffff, 0x00170016,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_GCPM_GENERAL_3, 0xffffffff, 0x00000080,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ 0x000c, 0xffffffff, 0x0000001c,
+ 0x000d, 0x000f0000, 0x000f0000,
+ 0x0583, 0xffffffff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmMC_CITF_MISC_WR_CG, 0x000c0000, 0x000c0000,
+ mmMC_CITF_MISC_RD_CG, 0x000c0000, 0x000c0000,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ 0x157a, 0x00000001, 0x00000001,
+ mmHDP_MEM_POWER_LS, 0x00000001, 0x00000001,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ 0x3430, 0xfffffff0, 0x00000100,
+ 0x3630, 0xfffffff0, 0x00000100,
+};
+static const u32 pitcairn_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xfffffffc,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ 0x2458, 0xffffffff, 0x00010000,
+ 0x2459, 0xffffffff, 0x00030002,
+ 0x245a, 0xffffffff, 0x00040007,
+ 0x245b, 0xffffffff, 0x00060005,
+ 0x245c, 0xffffffff, 0x00090008,
+ 0x245d, 0xffffffff, 0x00020001,
+ 0x245e, 0xffffffff, 0x00040003,
+ 0x245f, 0xffffffff, 0x00000007,
+ 0x2460, 0xffffffff, 0x00060005,
+ 0x2461, 0xffffffff, 0x00090008,
+ 0x2462, 0xffffffff, 0x00030002,
+ 0x2463, 0xffffffff, 0x00050004,
+ 0x2464, 0xffffffff, 0x00000008,
+ 0x2465, 0xffffffff, 0x00070006,
+ 0x2466, 0xffffffff, 0x000a0009,
+ 0x2467, 0xffffffff, 0x00040003,
+ 0x2468, 0xffffffff, 0x00060005,
+ 0x2469, 0xffffffff, 0x00000009,
+ 0x246a, 0xffffffff, 0x00080007,
+ 0x246b, 0xffffffff, 0x000b000a,
+ 0x246c, 0xffffffff, 0x00050004,
+ 0x246d, 0xffffffff, 0x00070006,
+ 0x246e, 0xffffffff, 0x0008000b,
+ 0x246f, 0xffffffff, 0x000a0009,
+ 0x2470, 0xffffffff, 0x000d000c,
+ 0x2480, 0xffffffff, 0x00090008,
+ 0x2481, 0xffffffff, 0x000b000a,
+ 0x2482, 0xffffffff, 0x000c000f,
+ 0x2483, 0xffffffff, 0x000e000d,
+ 0x2484, 0xffffffff, 0x00110010,
+ 0x2485, 0xffffffff, 0x000a0009,
+ 0x2486, 0xffffffff, 0x000c000b,
+ 0x2487, 0xffffffff, 0x0000000f,
+ 0x2488, 0xffffffff, 0x000e000d,
+ 0x2489, 0xffffffff, 0x00110010,
+ 0x248a, 0xffffffff, 0x000b000a,
+ 0x248b, 0xffffffff, 0x000d000c,
+ 0x248c, 0xffffffff, 0x00000010,
+ 0x248d, 0xffffffff, 0x000f000e,
+ 0x248e, 0xffffffff, 0x00120011,
+ 0x248f, 0xffffffff, 0x000c000b,
+ 0x2490, 0xffffffff, 0x000e000d,
+ 0x2491, 0xffffffff, 0x00000011,
+ 0x2492, 0xffffffff, 0x0010000f,
+ 0x2493, 0xffffffff, 0x00130012,
+ 0x2494, 0xffffffff, 0x000d000c,
+ 0x2495, 0xffffffff, 0x000f000e,
+ 0x2496, 0xffffffff, 0x00100013,
+ 0x2497, 0xffffffff, 0x00120011,
+ 0x2498, 0xffffffff, 0x00150014,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_GCPM_GENERAL_3, 0xffffffff, 0x00000080,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ 0x000c, 0xffffffff, 0x0000001c,
+ 0x000d, 0x000f0000, 0x000f0000,
+ 0x0583, 0xffffffff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ 0x157a, 0x00000001, 0x00000001,
+ mmHDP_MEM_POWER_LS, 0x00000001, 0x00000001,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ 0x3430, 0xfffffff0, 0x00000100,
+ 0x3630, 0xfffffff0, 0x00000100,
+};
+
+static const u32 verde_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xfffffffc,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ 0x2458, 0xffffffff, 0x00010000,
+ 0x2459, 0xffffffff, 0x00030002,
+ 0x245a, 0xffffffff, 0x00040007,
+ 0x245b, 0xffffffff, 0x00060005,
+ 0x245c, 0xffffffff, 0x00090008,
+ 0x245d, 0xffffffff, 0x00020001,
+ 0x245e, 0xffffffff, 0x00040003,
+ 0x245f, 0xffffffff, 0x00000007,
+ 0x2460, 0xffffffff, 0x00060005,
+ 0x2461, 0xffffffff, 0x00090008,
+ 0x2462, 0xffffffff, 0x00030002,
+ 0x2463, 0xffffffff, 0x00050004,
+ 0x2464, 0xffffffff, 0x00000008,
+ 0x2465, 0xffffffff, 0x00070006,
+ 0x2466, 0xffffffff, 0x000a0009,
+ 0x2467, 0xffffffff, 0x00040003,
+ 0x2468, 0xffffffff, 0x00060005,
+ 0x2469, 0xffffffff, 0x00000009,
+ 0x246a, 0xffffffff, 0x00080007,
+ 0x246b, 0xffffffff, 0x000b000a,
+ 0x246c, 0xffffffff, 0x00050004,
+ 0x246d, 0xffffffff, 0x00070006,
+ 0x246e, 0xffffffff, 0x0008000b,
+ 0x246f, 0xffffffff, 0x000a0009,
+ 0x2470, 0xffffffff, 0x000d000c,
+ 0x2480, 0xffffffff, 0x00090008,
+ 0x2481, 0xffffffff, 0x000b000a,
+ 0x2482, 0xffffffff, 0x000c000f,
+ 0x2483, 0xffffffff, 0x000e000d,
+ 0x2484, 0xffffffff, 0x00110010,
+ 0x2485, 0xffffffff, 0x000a0009,
+ 0x2486, 0xffffffff, 0x000c000b,
+ 0x2487, 0xffffffff, 0x0000000f,
+ 0x2488, 0xffffffff, 0x000e000d,
+ 0x2489, 0xffffffff, 0x00110010,
+ 0x248a, 0xffffffff, 0x000b000a,
+ 0x248b, 0xffffffff, 0x000d000c,
+ 0x248c, 0xffffffff, 0x00000010,
+ 0x248d, 0xffffffff, 0x000f000e,
+ 0x248e, 0xffffffff, 0x00120011,
+ 0x248f, 0xffffffff, 0x000c000b,
+ 0x2490, 0xffffffff, 0x000e000d,
+ 0x2491, 0xffffffff, 0x00000011,
+ 0x2492, 0xffffffff, 0x0010000f,
+ 0x2493, 0xffffffff, 0x00130012,
+ 0x2494, 0xffffffff, 0x000d000c,
+ 0x2495, 0xffffffff, 0x000f000e,
+ 0x2496, 0xffffffff, 0x00100013,
+ 0x2497, 0xffffffff, 0x00120011,
+ 0x2498, 0xffffffff, 0x00150014,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_GCPM_GENERAL_3, 0xffffffff, 0x00000080,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ 0x000c, 0xffffffff, 0x0000001c,
+ 0x000d, 0x000f0000, 0x000f0000,
+ 0x0583, 0xffffffff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmMC_CITF_MISC_WR_CG, 0x000c0000, 0x000c0000,
+ mmMC_CITF_MISC_RD_CG, 0x000c0000, 0x000c0000,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ 0x157a, 0x00000001, 0x00000001,
+ mmHDP_MEM_POWER_LS, 0x00000001, 0x00000001,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ 0x3430, 0xfffffff0, 0x00000100,
+ 0x3630, 0xfffffff0, 0x00000100,
+};
+
+static const u32 oland_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xfffffffc,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ 0x2458, 0xffffffff, 0x00010000,
+ 0x2459, 0xffffffff, 0x00030002,
+ 0x245a, 0xffffffff, 0x00040007,
+ 0x245b, 0xffffffff, 0x00060005,
+ 0x245c, 0xffffffff, 0x00090008,
+ 0x245d, 0xffffffff, 0x00020001,
+ 0x245e, 0xffffffff, 0x00040003,
+ 0x245f, 0xffffffff, 0x00000007,
+ 0x2460, 0xffffffff, 0x00060005,
+ 0x2461, 0xffffffff, 0x00090008,
+ 0x2462, 0xffffffff, 0x00030002,
+ 0x2463, 0xffffffff, 0x00050004,
+ 0x2464, 0xffffffff, 0x00000008,
+ 0x2465, 0xffffffff, 0x00070006,
+ 0x2466, 0xffffffff, 0x000a0009,
+ 0x2467, 0xffffffff, 0x00040003,
+ 0x2468, 0xffffffff, 0x00060005,
+ 0x2469, 0xffffffff, 0x00000009,
+ 0x246a, 0xffffffff, 0x00080007,
+ 0x246b, 0xffffffff, 0x000b000a,
+ 0x246c, 0xffffffff, 0x00050004,
+ 0x246d, 0xffffffff, 0x00070006,
+ 0x246e, 0xffffffff, 0x0008000b,
+ 0x246f, 0xffffffff, 0x000a0009,
+ 0x2470, 0xffffffff, 0x000d000c,
+ 0x2471, 0xffffffff, 0x00060005,
+ 0x2472, 0xffffffff, 0x00080007,
+ 0x2473, 0xffffffff, 0x0000000b,
+ 0x2474, 0xffffffff, 0x000a0009,
+ 0x2475, 0xffffffff, 0x000d000c,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_GCPM_GENERAL_3, 0xffffffff, 0x00000080,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ 0x000c, 0xffffffff, 0x0000001c,
+ 0x000d, 0x000f0000, 0x000f0000,
+ 0x0583, 0xffffffff, 0x00000100,
+ mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+ mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmMC_CITF_MISC_WR_CG, 0x000c0000, 0x000c0000,
+ mmMC_CITF_MISC_RD_CG, 0x000c0000, 0x000c0000,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ 0x157a, 0x00000001, 0x00000001,
+ mmHDP_MEM_POWER_LS, 0x00000001, 0x00000001,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ 0x3430, 0xfffffff0, 0x00000100,
+ 0x3630, 0xfffffff0, 0x00000100,
+};
+
+static const u32 hainan_mgcg_cgcg_init[] =
+{
+ mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xfffffffc,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
+ mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
+ mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
+ mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
+ mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
+ mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
+ mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ 0x2458, 0xffffffff, 0x00010000,
+ 0x2459, 0xffffffff, 0x00030002,
+ 0x245a, 0xffffffff, 0x00040007,
+ 0x245b, 0xffffffff, 0x00060005,
+ 0x245c, 0xffffffff, 0x00090008,
+ 0x245d, 0xffffffff, 0x00020001,
+ 0x245e, 0xffffffff, 0x00040003,
+ 0x245f, 0xffffffff, 0x00000007,
+ 0x2460, 0xffffffff, 0x00060005,
+ 0x2461, 0xffffffff, 0x00090008,
+ 0x2462, 0xffffffff, 0x00030002,
+ 0x2463, 0xffffffff, 0x00050004,
+ 0x2464, 0xffffffff, 0x00000008,
+ 0x2465, 0xffffffff, 0x00070006,
+ 0x2466, 0xffffffff, 0x000a0009,
+ 0x2467, 0xffffffff, 0x00040003,
+ 0x2468, 0xffffffff, 0x00060005,
+ 0x2469, 0xffffffff, 0x00000009,
+ 0x246a, 0xffffffff, 0x00080007,
+ 0x246b, 0xffffffff, 0x000b000a,
+ 0x246c, 0xffffffff, 0x00050004,
+ 0x246d, 0xffffffff, 0x00070006,
+ 0x246e, 0xffffffff, 0x0008000b,
+ 0x246f, 0xffffffff, 0x000a0009,
+ 0x2470, 0xffffffff, 0x000d000c,
+ 0x2471, 0xffffffff, 0x00060005,
+ 0x2472, 0xffffffff, 0x00080007,
+ 0x2473, 0xffffffff, 0x0000000b,
+ 0x2474, 0xffffffff, 0x000a0009,
+ 0x2475, 0xffffffff, 0x000d000c,
+ mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
+ mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
+ mmRLC_GCPM_GENERAL_3, 0xffffffff, 0x00000080,
+ mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
+ 0x000c, 0xffffffff, 0x0000001c,
+ 0x000d, 0x000f0000, 0x000f0000,
+ 0x0583, 0xffffffff, 0x00000100,
+ 0x0409, 0xffffffff, 0x00000100,
+ mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104,
+ mmMC_CITF_MISC_WR_CG, 0x000c0000, 0x000c0000,
+ mmMC_CITF_MISC_RD_CG, 0x000c0000, 0x000c0000,
+ mmHDP_MEM_POWER_LS, 0x00000001, 0x00000001,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+ mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
+ 0x3430, 0xfffffff0, 0x00000100,
+ 0x3630, 0xfffffff0, 0x00000100,
+};
+
+/* XXX: update when we support VCE */
+#if 0
+/* tahiti, pitcarin, verde */
+static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(tahiti_video_codecs_encode_array),
+ .codec_array = tahiti_video_codecs_encode_array,
+};
+#else
+static const struct amdgpu_video_codecs tahiti_video_codecs_encode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+#endif
+/* oland and hainan don't support encode */
+static const struct amdgpu_video_codecs hainan_video_codecs_encode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+/* tahiti, pitcarin, verde, oland */
+static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 41,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 2048,
+ .max_height = 1152,
+ .max_pixels_per_frame = 2048 * 1152,
+ .max_level = 4,
+ },
+};
+
+static const struct amdgpu_video_codecs tahiti_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(tahiti_video_codecs_decode_array),
+ .codec_array = tahiti_video_codecs_decode_array,
+};
+
+/* hainan doesn't support decode */
+static const struct amdgpu_video_codecs hainan_video_codecs_decode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static int si_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (adev->asic_type) {
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ if (encode)
+ *codecs = &tahiti_video_codecs_encode;
+ else
+ *codecs = &tahiti_video_codecs_decode;
+ return 0;
+ case CHIP_OLAND:
+ if (encode)
+ *codecs = &hainan_video_codecs_encode;
+ else
+ *codecs = &tahiti_video_codecs_decode;
+ return 0;
+ case CHIP_HAINAN:
+ if (encode)
+ *codecs = &hainan_video_codecs_encode;
+ else
+ *codecs = &hainan_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 si_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(AMDGPU_PCIE_INDEX, reg);
+ (void)RREG32(AMDGPU_PCIE_INDEX);
+ r = RREG32(AMDGPU_PCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void si_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(AMDGPU_PCIE_INDEX, reg);
+ (void)RREG32(AMDGPU_PCIE_INDEX);
+ WREG32(AMDGPU_PCIE_DATA, v);
+ (void)RREG32(AMDGPU_PCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+ (void)RREG32(PCIE_PORT_INDEX);
+ r = RREG32(PCIE_PORT_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void si_pciep_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+ (void)RREG32(PCIE_PORT_INDEX);
+ WREG32(PCIE_PORT_DATA, (v));
+ (void)RREG32(PCIE_PORT_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(SMC_IND_INDEX_0, (reg));
+ r = RREG32(SMC_IND_DATA_0);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ return r;
+}
+
+static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(SMC_IND_INDEX_0, (reg));
+ WREG32(SMC_IND_DATA_0, (v));
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
+static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ r = RREG32(mmUVD_CTX_DATA);
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ return r;
+}
+
+static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ WREG32(mmUVD_CTX_DATA, (v));
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
+static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
+ {GRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
+ {GB_ADDR_CONFIG},
+ {MC_ARB_RAMCFG},
+ {GB_TILE_MODE0},
+ {GB_TILE_MODE1},
+ {GB_TILE_MODE2},
+ {GB_TILE_MODE3},
+ {GB_TILE_MODE4},
+ {GB_TILE_MODE5},
+ {GB_TILE_MODE6},
+ {GB_TILE_MODE7},
+ {GB_TILE_MODE8},
+ {GB_TILE_MODE9},
+ {GB_TILE_MODE10},
+ {GB_TILE_MODE11},
+ {GB_TILE_MODE12},
+ {GB_TILE_MODE13},
+ {GB_TILE_MODE14},
+ {GB_TILE_MODE15},
+ {GB_TILE_MODE16},
+ {GB_TILE_MODE17},
+ {GB_TILE_MODE18},
+ {GB_TILE_MODE19},
+ {GB_TILE_MODE20},
+ {GB_TILE_MODE21},
+ {GB_TILE_MODE22},
+ {GB_TILE_MODE23},
+ {GB_TILE_MODE24},
+ {GB_TILE_MODE25},
+ {GB_TILE_MODE26},
+ {GB_TILE_MODE27},
+ {GB_TILE_MODE28},
+ {GB_TILE_MODE29},
+ {GB_TILE_MODE30},
+ {GB_TILE_MODE31},
+ {CC_RB_BACKEND_DISABLE, true},
+ {GC_USER_RB_BACKEND_DISABLE, true},
+ {PA_SC_RASTER_CONFIG, true},
+};
+
+static uint32_t si_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ uint32_t val;
+ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
+ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
+
+ switch (reg_offset) {
+ case mmCC_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
+ case mmGC_USER_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
+ case mmPA_SC_RASTER_CONFIG:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
+ }
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+ } else {
+ unsigned idx;
+
+ switch (reg_offset) {
+ case mmGB_ADDR_CONFIG:
+ return adev->gfx.config.gb_addr_config;
+ case mmMC_ARB_RAMCFG:
+ return adev->gfx.config.mc_arb_ramcfg;
+ case mmGB_TILE_MODE0:
+ case mmGB_TILE_MODE1:
+ case mmGB_TILE_MODE2:
+ case mmGB_TILE_MODE3:
+ case mmGB_TILE_MODE4:
+ case mmGB_TILE_MODE5:
+ case mmGB_TILE_MODE6:
+ case mmGB_TILE_MODE7:
+ case mmGB_TILE_MODE8:
+ case mmGB_TILE_MODE9:
+ case mmGB_TILE_MODE10:
+ case mmGB_TILE_MODE11:
+ case mmGB_TILE_MODE12:
+ case mmGB_TILE_MODE13:
+ case mmGB_TILE_MODE14:
+ case mmGB_TILE_MODE15:
+ case mmGB_TILE_MODE16:
+ case mmGB_TILE_MODE17:
+ case mmGB_TILE_MODE18:
+ case mmGB_TILE_MODE19:
+ case mmGB_TILE_MODE20:
+ case mmGB_TILE_MODE21:
+ case mmGB_TILE_MODE22:
+ case mmGB_TILE_MODE23:
+ case mmGB_TILE_MODE24:
+ case mmGB_TILE_MODE25:
+ case mmGB_TILE_MODE26:
+ case mmGB_TILE_MODE27:
+ case mmGB_TILE_MODE28:
+ case mmGB_TILE_MODE29:
+ case mmGB_TILE_MODE30:
+ case mmGB_TILE_MODE31:
+ idx = (reg_offset - mmGB_TILE_MODE0);
+ return adev->gfx.config.tile_mode_array[idx];
+ default:
+ return RREG32(reg_offset);
+ }
+ }
+}
+static int si_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) {
+ bool indexed = si_allowed_read_registers[i].grbm_indexed;
+
+ if (reg_offset != si_allowed_read_registers[i].reg_offset)
+ continue;
+
+ *value = si_get_register_value(adev, indexed, se_num, sh_num,
+ reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static bool si_read_disabled_bios(struct amdgpu_device *adev)
+{
+ u32 bus_cntl;
+ u32 d1vga_control = 0;
+ u32 d2vga_control = 0;
+ u32 vga_render_control = 0;
+ u32 rom_cntl;
+ bool r;
+
+ bus_cntl = RREG32(R600_BUS_CNTL);
+ if (adev->mode_info.num_crtc) {
+ d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
+ d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
+ vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ }
+ rom_cntl = RREG32(R600_ROM_CNTL);
+
+ /* enable the rom */
+ WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+ if (adev->mode_info.num_crtc) {
+ /* Disable VGA mode */
+ WREG32(AVIVO_D1VGA_CONTROL,
+ (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+ AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+ WREG32(AVIVO_D2VGA_CONTROL,
+ (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+ AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+ WREG32(VGA_RENDER_CONTROL,
+ (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+ }
+ WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
+
+ r = amdgpu_read_bios(adev);
+
+ /* restore regs */
+ WREG32(R600_BUS_CNTL, bus_cntl);
+ if (adev->mode_info.num_crtc) {
+ WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
+ WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
+ WREG32(VGA_RENDER_CONTROL, vga_render_control);
+ }
+ WREG32(R600_ROM_CNTL, rom_cntl);
+ return r;
+}
+
+#define mmROM_INDEX 0x2A
+#define mmROM_DATA 0x2B
+
+static bool si_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+ /* set rom index to 0 */
+ WREG32(mmROM_INDEX, 0);
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(mmROM_DATA);
+
+ return true;
+}
+
+static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
+{
+ u32 tmp, i;
+
+ tmp = RREG32(CG_SPLL_FUNC_CNTL);
+ tmp |= SPLL_BYPASS_EN;
+ WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+ tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+ tmp |= SPLL_CTLREQ_CHG;
+ WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+ break;
+ udelay(1);
+ }
+
+ tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
+ tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
+ WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+
+ tmp = RREG32(MPLL_CNTL_MODE);
+ tmp &= ~MPLL_MCLK_SEL;
+ WREG32(MPLL_CNTL_MODE, tmp);
+}
+
+static void si_spll_powerdown(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32(SPLL_CNTL_MODE);
+ tmp |= SPLL_SW_DIR_CONTROL;
+ WREG32(SPLL_CNTL_MODE, tmp);
+
+ tmp = RREG32(CG_SPLL_FUNC_CNTL);
+ tmp |= SPLL_RESET;
+ WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+ tmp = RREG32(CG_SPLL_FUNC_CNTL);
+ tmp |= SPLL_SLEEP;
+ WREG32(CG_SPLL_FUNC_CNTL, tmp);
+
+ tmp = RREG32(SPLL_CNTL_MODE);
+ tmp &= ~SPLL_SW_DIR_CONTROL;
+ WREG32(SPLL_CNTL_MODE, tmp);
+}
+
+static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int r = -EINVAL;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* set mclk/sclk to bypass */
+ si_set_clk_bypass_mode(adev);
+ /* powerdown spll */
+ si_spll_powerdown(adev);
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ /* reset */
+ amdgpu_device_pci_config_reset(adev);
+
+ udelay(100);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+ /* enable BM */
+ pci_set_master(adev->pdev);
+ adev->has_hw_reset = true;
+ r = 0;
+ break;
+ }
+ udelay(1);
+ }
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return r;
+}
+
+static bool si_asic_supports_baco(struct amdgpu_device *adev)
+{
+ return false;
+}
+
+static enum amd_reset_method
+si_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_PCI)
+ return amdgpu_reset_method;
+ else if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
+ amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+static int si_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ switch (si_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ r = amdgpu_device_pci_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "PCI CONFIG reset\n");
+ r = si_gpu_pci_config_reset(adev);
+ break;
+ }
+
+ return r;
+}
+
+static u32 si_get_config_memsize(struct amdgpu_device *adev)
+{
+ return RREG32(mmCONFIG_MEMSIZE);
+}
+
+static void si_vga_set_state(struct amdgpu_device *adev, bool state)
+{
+ uint32_t temp;
+
+ temp = RREG32(CONFIG_CNTL);
+ if (!state) {
+ temp &= ~(1<<0);
+ temp |= (1<<1);
+ } else {
+ temp &= ~(1<<1);
+ }
+ WREG32(CONFIG_CNTL, temp);
+}
+
+static u32 si_get_xclk(struct amdgpu_device *adev)
+{
+ u32 reference_clock = adev->clock.spll.reference_freq;
+ u32 tmp;
+
+ tmp = RREG32(CG_CLKPIN_CNTL_2);
+ if (tmp & MUX_TCLK_TO_XCLK)
+ return TCLK;
+
+ tmp = RREG32(CG_CLKPIN_CNTL);
+ if (tmp & XTALIN_DIVIDE)
+ return reference_clock / 4;
+
+ return reference_clock;
+}
+
+static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void si_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
+static bool si_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
+static bool si_need_reset_on_init(struct amdgpu_device *adev)
+{
+ return false;
+}
+
+static int si_get_pcie_lanes(struct amdgpu_device *adev)
+{
+ u32 link_width_cntl;
+
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+
+ switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+ case LC_LINK_WIDTH_X1:
+ return 1;
+ case LC_LINK_WIDTH_X2:
+ return 2;
+ case LC_LINK_WIDTH_X4:
+ return 4;
+ case LC_LINK_WIDTH_X8:
+ return 8;
+ case LC_LINK_WIDTH_X0:
+ case LC_LINK_WIDTH_X16:
+ default:
+ return 16;
+ }
+}
+
+static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
+{
+ u32 link_width_cntl, mask;
+
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ switch (lanes) {
+ case 0:
+ mask = LC_LINK_WIDTH_X0;
+ break;
+ case 1:
+ mask = LC_LINK_WIDTH_X1;
+ break;
+ case 2:
+ mask = LC_LINK_WIDTH_X2;
+ break;
+ case 4:
+ mask = LC_LINK_WIDTH_X4;
+ break;
+ case 8:
+ mask = LC_LINK_WIDTH_X8;
+ break;
+ case 16:
+ mask = LC_LINK_WIDTH_X16;
+ break;
+ default:
+ DRM_ERROR("invalid pcie lane request: %d\n", lanes);
+ return;
+ }
+
+ link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl &= ~LC_LINK_WIDTH_MASK;
+ link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
+ link_width_cntl |= (LC_RECONFIG_NOW |
+ LC_RECONFIG_ARC_MISSING_ESCAPE);
+
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+}
+
+static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
+static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev,
+ unsigned cg_upll_func_cntl)
+{
+ unsigned i;
+
+ /* Make sure UPLL_CTLREQ is deasserted */
+ WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
+
+ mdelay(10);
+
+ /* Assert UPLL_CTLREQ */
+ WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+ /* Wait for CTLACK and CTLACK2 to get asserted */
+ for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
+ uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+
+ if ((RREG32(cg_upll_func_cntl) & mask) == mask)
+ break;
+ mdelay(10);
+ }
+
+ /* Deassert UPLL_CTLREQ */
+ WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK);
+
+ if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
+ DRM_ERROR("Timeout setting UVD clocks!\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq,
+ unsigned target_freq,
+ unsigned pd_min,
+ unsigned pd_even)
+{
+ unsigned post_div = vco_freq / target_freq;
+
+ /* Adjust to post divider minimum value */
+ if (post_div < pd_min)
+ post_div = pd_min;
+
+ /* We alway need a frequency less than or equal the target */
+ if ((vco_freq / post_div) > target_freq)
+ post_div += 1;
+
+ /* Post dividers above a certain value must be even */
+ if (post_div > pd_even && post_div % 2)
+ post_div += 1;
+
+ return post_div;
+}
+
+/**
+ * si_calc_upll_dividers - calc UPLL clock dividers
+ *
+ * @adev: amdgpu_device pointer
+ * @vclk: wanted VCLK
+ * @dclk: wanted DCLK
+ * @vco_min: minimum VCO frequency
+ * @vco_max: maximum VCO frequency
+ * @fb_factor: factor to multiply vco freq with
+ * @fb_mask: limit and bitmask for feedback divider
+ * @pd_min: post divider minimum
+ * @pd_max: post divider maximum
+ * @pd_even: post divider must be even above this value
+ * @optimal_fb_div: resulting feedback divider
+ * @optimal_vclk_div: resulting vclk post divider
+ * @optimal_dclk_div: resulting dclk post divider
+ *
+ * Calculate dividers for UVDs UPLL (except APUs).
+ * Returns zero on success; -EINVAL on error.
+ */
+static int si_calc_upll_dividers(struct amdgpu_device *adev,
+ unsigned vclk, unsigned dclk,
+ unsigned vco_min, unsigned vco_max,
+ unsigned fb_factor, unsigned fb_mask,
+ unsigned pd_min, unsigned pd_max,
+ unsigned pd_even,
+ unsigned *optimal_fb_div,
+ unsigned *optimal_vclk_div,
+ unsigned *optimal_dclk_div)
+{
+ unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq;
+
+ /* Start off with something large */
+ unsigned optimal_score = ~0;
+
+ /* Loop through vco from low to high */
+ vco_min = max(max(vco_min, vclk), dclk);
+ for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
+ uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
+ unsigned vclk_div, dclk_div, score;
+
+ do_div(fb_div, ref_freq);
+
+ /* fb div out of range ? */
+ if (fb_div > fb_mask)
+ break; /* It can oly get worse */
+
+ fb_div &= fb_mask;
+
+ /* Calc vclk divider with current vco freq */
+ vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk,
+ pd_min, pd_even);
+ if (vclk_div > pd_max)
+ break; /* vco is too big, it has to stop */
+
+ /* Calc dclk divider with current vco freq */
+ dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk,
+ pd_min, pd_even);
+ if (dclk_div > pd_max)
+ break; /* vco is too big, it has to stop */
+
+ /* Calc score with current vco freq */
+ score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div);
+
+ /* Determine if this vco setting is better than current optimal settings */
+ if (score < optimal_score) {
+ *optimal_fb_div = fb_div;
+ *optimal_vclk_div = vclk_div;
+ *optimal_dclk_div = dclk_div;
+ optimal_score = score;
+ if (optimal_score == 0)
+ break; /* It can't get better than this */
+ }
+ }
+
+ /* Did we found a valid setup ? */
+ if (optimal_score == ~0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
+ int r;
+
+ /* Bypass vclk and dclk with bclk */
+ WREG32_P(CG_UPLL_FUNC_CNTL_2,
+ VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+ ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+ /* Put PLL in bypass mode */
+ WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
+
+ if (!vclk || !dclk) {
+ /* Keep the Bypass mode */
+ return 0;
+ }
+
+ r = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000,
+ 16384, 0x03FFFFFF, 0, 128, 5,
+ &fb_div, &vclk_div, &dclk_div);
+ if (r)
+ return r;
+
+ /* Set RESET_ANTI_MUX to 0 */
+ WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
+
+ /* Set VCO_MODE to 1 */
+ WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
+
+ /* Disable sleep mode */
+ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
+
+ /* Deassert UPLL_RESET */
+ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+ mdelay(1);
+
+ r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
+ if (r)
+ return r;
+
+ /* Assert UPLL_RESET again */
+ WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+ /* Disable spread spectrum. */
+ WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+ /* Set feedback divider */
+ WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
+
+ /* Set ref divider to 0 */
+ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
+
+ if (fb_div < 307200)
+ WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
+ else
+ WREG32_P(CG_UPLL_FUNC_CNTL_4,
+ UPLL_SPARE_ISPARE9,
+ ~UPLL_SPARE_ISPARE9);
+
+ /* Set PDIV_A and PDIV_B */
+ WREG32_P(CG_UPLL_FUNC_CNTL_2,
+ UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
+ ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
+
+ /* Give the PLL some time to settle */
+ mdelay(15);
+
+ /* Deassert PLL_RESET */
+ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+ mdelay(15);
+
+ /* Switch from bypass mode to normal mode */
+ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+ r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL);
+ if (r)
+ return r;
+
+ /* Switch VCLK and DCLK selection */
+ WREG32_P(CG_UPLL_FUNC_CNTL_2,
+ VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+ ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+ mdelay(100);
+
+ return 0;
+}
+
+static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ /* Make sure VCEPLL_CTLREQ is deasserted */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+ mdelay(10);
+
+ /* Assert UPLL_CTLREQ */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+ /* Wait for CTLACK and CTLACK2 to get asserted */
+ for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) {
+ uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+
+ if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
+ break;
+ mdelay(10);
+ }
+
+ /* Deassert UPLL_CTLREQ */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+ if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
+ DRM_ERROR("Timeout setting UVD clocks!\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
+ int r;
+
+ /* Bypass evclk and ecclk with bclk */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
+ ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+ /* Put PLL in bypass mode */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
+ ~VCEPLL_BYPASS_EN_MASK);
+
+ if (!evclk || !ecclk) {
+ /* Keep the Bypass mode, put PLL to sleep */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+ ~VCEPLL_SLEEP_MASK);
+ return 0;
+ }
+
+ r = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000,
+ 16384, 0x03FFFFFF, 0, 128, 5,
+ &fb_div, &evclk_div, &ecclk_div);
+ if (r)
+ return r;
+
+ /* Set RESET_ANTI_MUX to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
+
+ /* Set VCO_MODE to 1 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
+ ~VCEPLL_VCO_MODE_MASK);
+
+ /* Toggle VCEPLL_SLEEP to 1 then back to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+ ~VCEPLL_SLEEP_MASK);
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
+
+ /* Deassert VCEPLL_RESET */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+ mdelay(1);
+
+ r = si_vce_send_vcepll_ctlreq(adev);
+ if (r)
+ return r;
+
+ /* Assert VCEPLL_RESET again */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
+
+ /* Disable spread spectrum. */
+ WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+ /* Set feedback divider */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3,
+ VCEPLL_FB_DIV(fb_div),
+ ~VCEPLL_FB_DIV_MASK);
+
+ /* Set ref divider to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
+
+ /* Set PDIV_A and PDIV_B */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
+ ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
+
+ /* Give the PLL some time to settle */
+ mdelay(15);
+
+ /* Deassert PLL_RESET */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+ mdelay(15);
+
+ /* Switch from bypass mode to normal mode */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
+
+ r = si_vce_send_vcepll_ctlreq(adev);
+ if (r)
+ return r;
+
+ /* Switch VCLK and DCLK selection */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
+ ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+ mdelay(100);
+
+ return 0;
+}
+
+static void si_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static const struct amdgpu_asic_funcs si_asic_funcs =
+{
+ .read_disabled_bios = &si_read_disabled_bios,
+ .read_bios_from_rom = &si_read_bios_from_rom,
+ .read_register = &si_read_register,
+ .reset = &si_asic_reset,
+ .reset_method = &si_asic_reset_method,
+ .set_vga_state = &si_vga_set_state,
+ .get_xclk = &si_get_xclk,
+ .set_uvd_clocks = &si_set_uvd_clocks,
+ .set_vce_clocks = &si_set_vce_clocks,
+ .get_pcie_lanes = &si_get_pcie_lanes,
+ .set_pcie_lanes = &si_set_pcie_lanes,
+ .get_config_memsize = &si_get_config_memsize,
+ .flush_hdp = &si_flush_hdp,
+ .invalidate_hdp = &si_invalidate_hdp,
+ .need_full_reset = &si_need_full_reset,
+ .get_pcie_usage = &si_get_pcie_usage,
+ .need_reset_on_init = &si_need_reset_on_init,
+ .get_pcie_replay_count = &si_get_pcie_replay_count,
+ .supports_baco = &si_asic_supports_baco,
+ .pre_asic_init = &si_pre_asic_init,
+ .query_video_codecs = &si_query_video_codecs,
+};
+
+static uint32_t si_get_rev_id(struct amdgpu_device *adev)
+{
+ return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
+}
+
+static int si_common_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->smc_rreg = &si_smc_rreg;
+ adev->smc_wreg = &si_smc_wreg;
+ adev->pcie_rreg = &si_pcie_rreg;
+ adev->pcie_wreg = &si_pcie_wreg;
+ adev->pciep_rreg = &si_pciep_rreg;
+ adev->pciep_wreg = &si_pciep_wreg;
+ adev->uvd_ctx_rreg = si_uvd_ctx_rreg;
+ adev->uvd_ctx_wreg = si_uvd_ctx_wreg;
+ adev->didt_rreg = NULL;
+ adev->didt_wreg = NULL;
+
+ adev->asic_funcs = &si_asic_funcs;
+
+ adev->rev_id = si_get_rev_id(adev);
+ adev->external_rev_id = 0xFF;
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = (adev->rev_id == 0) ? 1 :
+ (adev->rev_id == 1) ? 5 : 6;
+ break;
+ case CHIP_PITCAIRN:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 20;
+ break;
+
+ case CHIP_VERDE:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ //???
+ adev->external_rev_id = adev->rev_id + 40;
+ break;
+ case CHIP_OLAND:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = 60;
+ break;
+ case CHIP_HAINAN:
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = 70;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int si_common_sw_init(void *handle)
+{
+ return 0;
+}
+
+static int si_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+
+static void si_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_TAHITI:
+ amdgpu_device_program_register_sequence(adev,
+ tahiti_golden_registers,
+ ARRAY_SIZE(tahiti_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ tahiti_golden_rlc_registers,
+ ARRAY_SIZE(tahiti_golden_rlc_registers));
+ amdgpu_device_program_register_sequence(adev,
+ tahiti_mgcg_cgcg_init,
+ ARRAY_SIZE(tahiti_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ tahiti_golden_registers2,
+ ARRAY_SIZE(tahiti_golden_registers2));
+ break;
+ case CHIP_PITCAIRN:
+ amdgpu_device_program_register_sequence(adev,
+ pitcairn_golden_registers,
+ ARRAY_SIZE(pitcairn_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ pitcairn_golden_rlc_registers,
+ ARRAY_SIZE(pitcairn_golden_rlc_registers));
+ amdgpu_device_program_register_sequence(adev,
+ pitcairn_mgcg_cgcg_init,
+ ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
+ break;
+ case CHIP_VERDE:
+ amdgpu_device_program_register_sequence(adev,
+ verde_golden_registers,
+ ARRAY_SIZE(verde_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ verde_golden_rlc_registers,
+ ARRAY_SIZE(verde_golden_rlc_registers));
+ amdgpu_device_program_register_sequence(adev,
+ verde_mgcg_cgcg_init,
+ ARRAY_SIZE(verde_mgcg_cgcg_init));
+ amdgpu_device_program_register_sequence(adev,
+ verde_pg_init,
+ ARRAY_SIZE(verde_pg_init));
+ break;
+ case CHIP_OLAND:
+ amdgpu_device_program_register_sequence(adev,
+ oland_golden_registers,
+ ARRAY_SIZE(oland_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ oland_golden_rlc_registers,
+ ARRAY_SIZE(oland_golden_rlc_registers));
+ amdgpu_device_program_register_sequence(adev,
+ oland_mgcg_cgcg_init,
+ ARRAY_SIZE(oland_mgcg_cgcg_init));
+ break;
+ case CHIP_HAINAN:
+ amdgpu_device_program_register_sequence(adev,
+ hainan_golden_registers,
+ ARRAY_SIZE(hainan_golden_registers));
+ amdgpu_device_program_register_sequence(adev,
+ hainan_golden_registers2,
+ ARRAY_SIZE(hainan_golden_registers2));
+ amdgpu_device_program_register_sequence(adev,
+ hainan_mgcg_cgcg_init,
+ ARRAY_SIZE(hainan_mgcg_cgcg_init));
+ break;
+
+
+ default:
+ BUG();
+ }
+}
+
+static void si_pcie_gen3_enable(struct amdgpu_device *adev)
+{
+ struct pci_dev *root = adev->pdev->bus->self;
+ u32 speed_cntl, current_data_rate;
+ int i;
+ u16 tmp16;
+
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
+ if (amdgpu_pcie_gen2 == 0)
+ return;
+
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
+ return;
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+ LC_CURRENT_DATA_RATE_SHIFT;
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+ if (current_data_rate == 2) {
+ DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
+ if (current_data_rate == 1) {
+ DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ return;
+ }
+ DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ }
+
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
+ return;
+
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+ if (current_data_rate != 2) {
+ u16 bridge_cfg, gpu_cfg;
+ u16 bridge_cfg2, gpu_cfg2;
+ u32 max_lw, current_lw, tmp;
+
+ pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+ tmp = RREG32_PCIE(PCIE_LC_STATUS1);
+ max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+ current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+
+ if (current_lw < max_lw) {
+ tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & LC_RENEGOTIATION_SUPPORT) {
+ tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+ tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+ tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ }
+ }
+
+ for (i = 0; i < 10; i++) {
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
+ if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+ break;
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp |= LC_REDO_EQ;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+ mdelay(100);
+
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ bridge_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_HAWD,
+ gpu_cfg &
+ PCI_EXP_LNKCTL_HAWD);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+ tmp &= ~LC_SET_QUIESCE;
+ WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ }
+ }
+ }
+
+ speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+ speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
+ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
+ else
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+ WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ break;
+ udelay(1);
+ }
+}
+
+static inline u32 si_pif_phy0_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+ r = RREG32(EVERGREEN_PIF_PHY0_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static inline void si_pif_phy0_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+ WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static inline u32 si_pif_phy1_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+ r = RREG32(EVERGREEN_PIF_PHY1_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static inline void si_pif_phy1_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+ WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+static void si_program_aspm(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+ bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+ bool disable_clkreq = false;
+
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (adev->flags & AMD_IS_APU)
+ return;
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ data &= ~LC_XMIT_N_FTS_MASK;
+ data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+ data |= LC_GO_TO_RECOVERY;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+
+ orig = data = RREG32_PCIE(PCIE_P_CNTL);
+ data |= P_IGNORE_EDB_ERR;
+ if (orig != data)
+ WREG32_PCIE(PCIE_P_CNTL, data);
+
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+ data |= LC_PMI_TO_L1_DIS;
+ if (!disable_l0s)
+ data |= LC_L0S_INACTIVITY(7);
+
+ if (!disable_l1) {
+ data |= LC_L1_INACTIVITY(7);
+ data &= ~LC_PMI_TO_L1_DIS;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+
+ if (!disable_plloff_in_l1) {
+ bool clk_req_support;
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
+ data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
+ data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+
+ if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
+ data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
+ data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
+ data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
+ data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
+ data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
+ data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
+ data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
+ data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+ }
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+ data |= LC_DYN_LANES_PWR_STATE(3);
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+
+ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
+ data &= ~LS2_EXIT_TIME_MASK;
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
+ data |= LS2_EXIT_TIME(5);
+ if (orig != data)
+ si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+
+ orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
+ data &= ~LS2_EXIT_TIME_MASK;
+ if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
+ data |= LS2_EXIT_TIME(5);
+ if (orig != data)
+ si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+
+ if (!disable_clkreq &&
+ !pci_is_root_bus(adev->pdev->bus)) {
+ struct pci_dev *root = adev->pdev->bus->self;
+ u32 lnkcap;
+
+ clk_req_support = false;
+ pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+ if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+ clk_req_support = true;
+ } else {
+ clk_req_support = false;
+ }
+
+ if (clk_req_support) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+ data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+
+ orig = data = RREG32(THM_CLK_CNTL);
+ data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+ data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ if (orig != data)
+ WREG32(THM_CLK_CNTL, data);
+
+ orig = data = RREG32(MISC_CLK_CNTL);
+ data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+ data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ if (orig != data)
+ WREG32(MISC_CLK_CNTL, data);
+
+ orig = data = RREG32(CG_CLKPIN_CNTL);
+ data &= ~BCLK_AS_XCLK;
+ if (orig != data)
+ WREG32(CG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32(CG_CLKPIN_CNTL_2);
+ data &= ~FORCE_BIF_REFCLK_EN;
+ if (orig != data)
+ WREG32(CG_CLKPIN_CNTL_2, data);
+
+ orig = data = RREG32(MPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_CLKOUT_SEL_MASK;
+ data |= MPLL_CLKOUT_SEL(4);
+ if (orig != data)
+ WREG32(MPLL_BYPASSCLK_SEL, data);
+
+ orig = data = RREG32(SPLL_CNTL_MODE);
+ data &= ~SPLL_REFCLK_SEL_MASK;
+ if (orig != data)
+ WREG32(SPLL_CNTL_MODE, data);
+ }
+ }
+ } else {
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+
+ orig = data = RREG32_PCIE(PCIE_CNTL2);
+ data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ if (orig != data)
+ WREG32_PCIE(PCIE_CNTL2, data);
+
+ if (!disable_l0s) {
+ data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+ if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+ data = RREG32_PCIE(PCIE_LC_STATUS1);
+ if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+ orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+ data &= ~LC_L0S_INACTIVITY_MASK;
+ if (orig != data)
+ WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ }
+ }
+ }
+}
+
+static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev)
+{
+ int readrq;
+ u16 v;
+
+ readrq = pcie_get_readrq(adev->pdev);
+ v = ffs(readrq) - 8;
+ if ((v == 0) || (v == 6) || (v == 7))
+ pcie_set_readrq(adev->pdev, 512);
+}
+
+static int si_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ si_fix_pci_max_read_req_size(adev);
+ si_init_golden_registers(adev);
+ si_pcie_gen3_enable(adev);
+ si_program_aspm(adev);
+
+ return 0;
+}
+
+static int si_common_hw_fini(void *handle)
+{
+ return 0;
+}
+
+static int si_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_common_hw_fini(adev);
+}
+
+static int si_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_common_hw_init(adev);
+}
+
+static bool si_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int si_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int si_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int si_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int si_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs si_common_ip_funcs = {
+ .name = "si_common",
+ .early_init = si_common_early_init,
+ .late_init = NULL,
+ .sw_init = si_common_sw_init,
+ .sw_fini = si_common_sw_fini,
+ .hw_init = si_common_hw_init,
+ .hw_fini = si_common_hw_fini,
+ .suspend = si_common_suspend,
+ .resume = si_common_resume,
+ .is_idle = si_common_is_idle,
+ .wait_for_idle = si_common_wait_for_idle,
+ .soft_reset = si_common_soft_reset,
+ .set_clockgating_state = si_common_set_clockgating_state,
+ .set_powergating_state = si_common_set_powergating_state,
+};
+
+static const struct amdgpu_ip_block_version si_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &si_common_ip_funcs,
+};
+
+int si_set_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VERDE:
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
+ /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
+ break;
+ case CHIP_OLAND:
+ amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
+ /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
+ break;
+ case CHIP_HAINAN:
+ amdgpu_device_ip_block_add(adev, &si_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+ break;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/si.h b/drivers/gpu/drm/amd/amdgpu/si.h
new file mode 100644
index 000000000..06ed7212a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SI_H__
+#define __SI_H__
+
+#define SI_FLUSH_GPU_TLB_NUM_WREG 2
+
+void si_srbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+int si_set_ip_blocks(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
new file mode 100644
index 000000000..42c4547f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "si.h"
+#include "sid.h"
+
+const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+{
+ DMA0_REGISTER_OFFSET,
+ DMA1_REGISTER_OFFSET
+};
+
+static void si_dma_set_ring_funcs(struct amdgpu_device *adev);
+static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
+static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
+
+static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
+
+ return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+}
+
+static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
+
+ WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
+}
+
+static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((lower_32_bits(ring->wptr) & 7) != 5)
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0));
+ amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * si_dma_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (VI).
+ */
+static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ amdgpu_ring_write(ring, seq);
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0));
+}
+
+static void si_dma_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl;
+ unsigned i;
+
+ amdgpu_sdma_unset_buffer_funcs_helper(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ /* dma0 */
+ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ }
+}
+
+static int si_dma_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz;
+ int i, r;
+ uint64_t rptr_addr;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+
+ WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+ WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+
+ rptr_addr = ring->rptr_gpu_addr;
+
+ WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+ WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+
+ rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+ WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+
+ /* enable DMA IBs */
+ ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+#ifdef __BIG_ENDIAN
+ ib_cntl |= DMA_IB_SWAP_ENABLE;
+#endif
+ WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+
+ dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
+ dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+ WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+
+ ring->wptr = 0;
+ WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * si_dma_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (VI).
+ * Returns 0 for success, error for failure.
+ */
+static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 4);
+ if (r)
+ goto error_free_wb;
+
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+error_free_wb:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * si_dma_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (VI).
+ * Returns 0 on success, error on failure.
+ */
+static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ u32 tmp = 0;
+ u64 gpu_addr;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r)
+ goto err0;
+
+ ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff;
+ ib.ptr[3] = 0xDEADBEEF;
+ ib.length_dw = 4;
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+/**
+ * si_dma_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using DMA (SI).
+ */
+static void si_dma_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
+ 1, 0, 0, bytes);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+ ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
+}
+
+/**
+ * si_dma_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using DMA (SI).
+ */
+static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * si_dma_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ uint64_t value;
+ unsigned ndw;
+
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ if (flags & AMDGPU_PTE_VALID)
+ value = addr;
+ else
+ value = 0;
+
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+ ib->ptr[ib->length_dw++] = pe; /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = value; /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ pe += ndw * 4;
+ addr += (ndw / 2) * incr;
+ count -= ndw / 2;
+ }
+}
+
+/**
+ * si_dma_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ while (ib->length_dw & 0x7)
+ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
+}
+
+/**
+ * si_dma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) |
+ (1 << 27)); /* Poll memory */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, seq); /* value */
+ amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */
+}
+
+/**
+ * si_dma_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (VI).
+ */
+static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for invalidate to complete */
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
+ amdgpu_ring_write(ring, 0xff << 16); /* retry */
+ amdgpu_ring_write(ring, 1 << vmid); /* mask */
+ amdgpu_ring_write(ring, 0); /* value */
+ amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
+}
+
+static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, (0xf << 16) | reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static int si_dma_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->sdma.num_instances = 2;
+
+ si_dma_set_ring_funcs(adev);
+ si_dma_set_buffer_funcs(adev);
+ si_dma_set_vm_pte_funcs(adev);
+ si_dma_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int si_dma_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* DMA0 trap event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* DMA1 trap event */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int si_dma_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ return 0;
+}
+
+static int si_dma_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_dma_start(adev);
+}
+
+static int si_dma_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ si_dma_stop(adev);
+
+ return 0;
+}
+
+static int si_dma_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_dma_hw_fini(adev);
+}
+
+static int si_dma_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_dma_hw_init(adev);
+}
+
+static bool si_dma_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(SRBM_STATUS2);
+
+ if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+ return false;
+
+ return true;
+}
+
+static int si_dma_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (si_dma_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int si_dma_soft_reset(void *handle)
+{
+ DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
+ return 0;
+}
+
+static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ switch (type) {
+ case AMDGPU_SDMA_IRQ_INSTANCE0:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl |= TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ case AMDGPU_SDMA_IRQ_INSTANCE1:
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl |= TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int si_dma_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ if (entry->src_id == 224)
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ else
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ return 0;
+}
+
+static int si_dma_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ u32 orig, data, offset;
+ int i;
+ bool enable;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ offset = DMA0_REGISTER_OFFSET;
+ else
+ offset = DMA1_REGISTER_OFFSET;
+ orig = data = RREG32(DMA_POWER_CNTL + offset);
+ data &= ~MEM_POWER_OVERRIDE;
+ if (data != orig)
+ WREG32(DMA_POWER_CNTL + offset, data);
+ WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+ }
+ } else {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ offset = DMA0_REGISTER_OFFSET;
+ else
+ offset = DMA1_REGISTER_OFFSET;
+ orig = data = RREG32(DMA_POWER_CNTL + offset);
+ data |= MEM_POWER_OVERRIDE;
+ if (data != orig)
+ WREG32(DMA_POWER_CNTL + offset, data);
+
+ orig = data = RREG32(DMA_CLK_CTRL + offset);
+ data = 0xff000000;
+ if (data != orig)
+ WREG32(DMA_CLK_CTRL + offset, data);
+ }
+ }
+
+ return 0;
+}
+
+static int si_dma_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ u32 tmp;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ WREG32(DMA_PGFSM_WRITE, 0x00002000);
+ WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+
+ for (tmp = 0; tmp < 5; tmp++)
+ WREG32(DMA_PGFSM_WRITE, 0);
+
+ return 0;
+}
+
+static const struct amd_ip_funcs si_dma_ip_funcs = {
+ .name = "si_dma",
+ .early_init = si_dma_early_init,
+ .late_init = NULL,
+ .sw_init = si_dma_sw_init,
+ .sw_fini = si_dma_sw_fini,
+ .hw_init = si_dma_hw_init,
+ .hw_fini = si_dma_hw_fini,
+ .suspend = si_dma_suspend,
+ .resume = si_dma_resume,
+ .is_idle = si_dma_is_idle,
+ .wait_for_idle = si_dma_wait_for_idle,
+ .soft_reset = si_dma_soft_reset,
+ .set_clockgating_state = si_dma_set_clockgating_state,
+ .set_powergating_state = si_dma_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0),
+ .support_64bit_ptrs = false,
+ .get_rptr = si_dma_ring_get_rptr,
+ .get_wptr = si_dma_ring_get_wptr,
+ .set_wptr = si_dma_ring_set_wptr,
+ .emit_frame_size =
+ 3 + 3 + /* hdp flush / invalidate */
+ 6 + /* si_dma_ring_emit_pipeline_sync */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */
+ 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */
+ .emit_ib = si_dma_ring_emit_ib,
+ .emit_fence = si_dma_ring_emit_fence,
+ .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
+ .emit_vm_flush = si_dma_ring_emit_vm_flush,
+ .test_ring = si_dma_ring_test_ring,
+ .test_ib = si_dma_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = si_dma_ring_pad_ib,
+ .emit_wreg = si_dma_ring_emit_wreg,
+};
+
+static void si_dma_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
+ .set = si_dma_set_trap_irq_state,
+ .process = si_dma_process_trap_irq,
+};
+
+static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
+}
+
+/**
+ * si_dma_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @tmz: is this a secure operation
+ *
+ * Copy GPU buffers using the DMA engine (VI).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
+ 1, 0, 0, byte_count);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff;
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff;
+}
+
+/**
+ * si_dma_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (VI).
+ */
+static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL,
+ 0, 0, 0, byte_count / 4);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16;
+}
+
+
+static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = {
+ .copy_max_bytes = 0xffff8,
+ .copy_num_dw = 5,
+ .emit_copy_buffer = si_dma_emit_copy_buffer,
+
+ .fill_max_bytes = 0xffff8,
+ .fill_num_dw = 4,
+ .emit_fill_buffer = si_dma_emit_fill_buffer,
+};
+
+static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &si_dma_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
+ .copy_pte_num_dw = 5,
+ .copy_pte = si_dma_vm_copy_pte,
+
+ .write_pte = si_dma_vm_write_pte,
+ .set_pte_pde = si_dma_vm_set_pte_pde,
+};
+
+static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version si_dma_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &si_dma_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.h b/drivers/gpu/drm/amd/amdgpu/si_dma.h
new file mode 100644
index 000000000..5ac1b8452
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SI_DMA_H__
+#define __SI_DMA_H__
+
+extern const struct amdgpu_ip_block_version si_dma_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
new file mode 100644
index 000000000..4e935baa7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef SI_ENUMS_H
+#define SI_ENUMS_H
+
+#define VBLANK_INT_MASK (1 << 0)
+#define DC_HPDx_INT_EN (1 << 16)
+#define VBLANK_ACK (1 << 4)
+#define VLINE_ACK (1 << 4)
+
+#define CURSOR_WIDTH 64
+#define CURSOR_HEIGHT 64
+
+#define VGA_VSTATUS_CNTL 0xFFFCFFFF
+#define PRIORITY_MARK_MASK 0x7fff
+#define PRIORITY_OFF (1 << 16)
+#define PRIORITY_ALWAYS_ON (1 << 20)
+#define INTERLEAVE_EN (1 << 0)
+
+#define LATENCY_WATERMARK_MASK(x) ((x) << 16)
+#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
+#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
+
+#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
+#define GRPH_ENDIAN_NONE 0
+#define GRPH_ENDIAN_8IN16 1
+#define GRPH_ENDIAN_8IN32 2
+#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
+
+#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
+#define GRPH_DEPTH_8BPP 0
+#define GRPH_DEPTH_16BPP 1
+#define GRPH_DEPTH_32BPP 2
+
+#define GRPH_FORMAT(x) (((x) & 0x7) << 8)
+#define GRPH_FORMAT_INDEXED 0
+#define GRPH_FORMAT_ARGB1555 0
+#define GRPH_FORMAT_ARGB565 1
+#define GRPH_FORMAT_ARGB4444 2
+#define GRPH_FORMAT_AI88 3
+#define GRPH_FORMAT_MONO16 4
+#define GRPH_FORMAT_BGRA5551 5
+#define GRPH_FORMAT_ARGB8888 0
+#define GRPH_FORMAT_ARGB2101010 1
+#define GRPH_FORMAT_32BPP_DIG 2
+#define GRPH_FORMAT_8B_ARGB2101010 3
+#define GRPH_FORMAT_BGRA1010102 4
+#define GRPH_FORMAT_8B_BGRA1010102 5
+#define GRPH_FORMAT_RGB111110 6
+#define GRPH_FORMAT_BGR101111 7
+
+#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
+#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
+#define GRPH_ARRAY_LINEAR_GENERAL 0
+#define GRPH_ARRAY_LINEAR_ALIGNED 1
+#define GRPH_ARRAY_1D_TILED_THIN1 2
+#define GRPH_ARRAY_2D_TILED_THIN1 4
+#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
+#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
+#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
+#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
+#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
+#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24)
+
+#define CURSOR_EN (1 << 0)
+#define CURSOR_MODE(x) (((x) & 0x3) << 8)
+#define CURSOR_MONO 0
+#define CURSOR_24_1 1
+#define CURSOR_24_8_PRE_MULT 2
+#define CURSOR_24_8_UNPRE_MULT 3
+#define CURSOR_2X_MAGNIFY (1 << 16)
+#define CURSOR_FORCE_MC_ON (1 << 20)
+#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
+#define CURSOR_URGENT_ALWAYS 0
+#define CURSOR_URGENT_1_8 1
+#define CURSOR_URGENT_1_4 2
+#define CURSOR_URGENT_3_8 3
+#define CURSOR_URGENT_1_2 4
+#define CURSOR_UPDATE_PENDING (1 << 0)
+#define CURSOR_UPDATE_TAKEN (1 << 1)
+#define CURSOR_UPDATE_LOCK (1 << 16)
+#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
+
+#define SI_CRTC0_REGISTER_OFFSET 0
+#define SI_CRTC1_REGISTER_OFFSET 0x300
+#define SI_CRTC2_REGISTER_OFFSET 0x2600
+#define SI_CRTC3_REGISTER_OFFSET 0x2900
+#define SI_CRTC4_REGISTER_OFFSET 0x2c00
+#define SI_CRTC5_REGISTER_OFFSET 0x2f00
+
+#define DMA0_REGISTER_OFFSET 0x000
+#define DMA1_REGISTER_OFFSET 0x200
+#define ES_AND_GS_AUTO 3
+#define RADEON_PACKET_TYPE3 3
+#define CE_PARTITION_BASE 3
+#define BUF_SWAP_32BIT (2 << 16)
+
+#define GFX_POWER_STATUS (1 << 1)
+#define GFX_CLOCK_STATUS (1 << 2)
+#define GFX_LS_STATUS (1 << 3)
+#define RLC_BUSY_STATUS (1 << 0)
+
+#define RLC_PUD(x) ((x) << 0)
+#define RLC_PUD_MASK (0xff << 0)
+#define RLC_PDD(x) ((x) << 8)
+#define RLC_PDD_MASK (0xff << 8)
+#define RLC_TTPD(x) ((x) << 16)
+#define RLC_TTPD_MASK (0xff << 16)
+#define RLC_MSD(x) ((x) << 24)
+#define RLC_MSD_MASK (0xff << 24)
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+
+#define GFX6_NUM_GFX_RINGS 1
+#define GFX6_NUM_COMPUTE_RINGS 2
+#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003
+
+#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_ALLOC_GDS 0x1B
+#define PACKET3_WRITE_GDS_RAM 0x1C
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_DRAW_INDEX_IMMD 0x2E
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_CONST 0x31
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+#define PACKET3_MPEG_INDEX 0x3A
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define PACKET3_MEM_WRITE 0x3D
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
+# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+# define PACKET3_DEST_BASE_0_ENA (1 << 0)
+# define PACKET3_DEST_BASE_1_ENA (1 << 1)
+# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
+# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
+# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
+# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
+# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
+# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
+# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
+# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
+# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
+# define PACKET3_DEST_BASE_2_ENA (1 << 19)
+# define PACKET3_DEST_BASE_3_ENA (1 << 21)
+# define PACKET3_TCL1_ACTION_ENA (1 << 22)
+# define PACKET3_TC_ACTION_ENA (1 << 23)
+# define PACKET3_CB_ACTION_ENA (1 << 25)
+# define PACKET3_DB_ACTION_ENA (1 << 26)
+# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_ONE_REG_WRITE 0x57
+#define PACKET3_LOAD_CONFIG_REG 0x5F
+#define PACKET3_LOAD_CONTEXT_REG 0x60
+#define PACKET3_LOAD_SH_REG 0x61
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x000a400
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_RESOURCE_INDIRECT 0x74
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_ME_WRITE 0x7A
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_CE_WRITE 0x7F
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER 0x87
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SET_CE_DE_COUNTERS 0x89
+#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
+#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
new file mode 100644
index 000000000..9a24f17a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "sid.h"
+#include "si_ih.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+static void si_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_cntl = RREG32(IH_CNTL);
+ u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
+
+ ih_cntl |= ENABLE_INTR;
+ ih_rb_cntl |= IH_RB_ENABLE;
+ WREG32(IH_CNTL, ih_cntl);
+ WREG32(IH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+static void si_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
+ u32 ih_cntl = RREG32(IH_CNTL);
+
+ ih_rb_cntl &= ~IH_RB_ENABLE;
+ ih_cntl &= ~ENABLE_INTR;
+ WREG32(IH_RB_CNTL, ih_rb_cntl);
+ WREG32(IH_CNTL, ih_cntl);
+ WREG32(IH_RB_RPTR, 0);
+ WREG32(IH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+static int si_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ int rb_bufsz;
+ u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+
+ si_ih_disable_interrupts(adev);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32(INTERRUPT_CNTL);
+ interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
+ interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
+ WREG32(INTERRUPT_CNTL, interrupt_cntl);
+
+ WREG32(IH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+ rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+
+ ih_rb_cntl = IH_WPTR_OVERFLOW_ENABLE |
+ IH_WPTR_OVERFLOW_CLEAR |
+ (rb_bufsz << 1) |
+ IH_WPTR_WRITEBACK_ENABLE;
+
+ WREG32(IH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+ WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+ WREG32(IH_RB_CNTL, ih_rb_cntl);
+ WREG32(IH_RB_RPTR, 0);
+ WREG32(IH_RB_WPTR, 0);
+
+ ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
+ if (adev->irq.msi_enabled)
+ ih_cntl |= RPTR_REARM;
+ WREG32(IH_CNTL, ih_cntl);
+
+ pci_set_master(adev->pdev);
+ si_ih_enable_interrupts(adev);
+
+ return 0;
+}
+
+static void si_ih_irq_disable(struct amdgpu_device *adev)
+{
+ si_ih_disable_interrupts(adev);
+ mdelay(1);
+}
+
+static u32 si_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
+ wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(IH_RB_CNTL);
+ tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
+ }
+ return (wptr & ih->ptr_mask);
+}
+
+static void si_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[4];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ entry->src_id = dw[0] & 0xff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
+ entry->ring_id = dw[2] & 0xff;
+ entry->vmid = (dw[2] >> 8) & 0xff;
+
+ ih->rptr += 16;
+}
+
+static void si_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ WREG32(IH_RB_RPTR, ih->rptr);
+}
+
+static int si_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ si_ih_set_interrupt_funcs(adev);
+
+ return 0;
+}
+
+static int si_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+ if (r)
+ return r;
+
+ return amdgpu_irq_init(adev);
+}
+
+static int si_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int si_ih_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_ih_irq_init(adev);
+}
+
+static int si_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ si_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int si_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_ih_hw_fini(adev);
+}
+
+static int si_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return si_ih_hw_init(adev);
+}
+
+static bool si_ih_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(SRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ return false;
+
+ return true;
+}
+
+static int si_ih_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (si_ih_is_idle(handle))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int si_ih_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(SRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(SRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(SRBM_SOFT_RESET, tmp);
+ tmp = RREG32(SRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(SRBM_SOFT_RESET, tmp);
+ tmp = RREG32(SRBM_SOFT_RESET);
+
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int si_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int si_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs si_ih_ip_funcs = {
+ .name = "si_ih",
+ .early_init = si_ih_early_init,
+ .late_init = NULL,
+ .sw_init = si_ih_sw_init,
+ .sw_fini = si_ih_sw_fini,
+ .hw_init = si_ih_hw_init,
+ .hw_fini = si_ih_hw_fini,
+ .suspend = si_ih_suspend,
+ .resume = si_ih_resume,
+ .is_idle = si_ih_is_idle,
+ .wait_for_idle = si_ih_wait_for_idle,
+ .soft_reset = si_ih_soft_reset,
+ .set_clockgating_state = si_ih_set_clockgating_state,
+ .set_powergating_state = si_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs si_ih_funcs = {
+ .get_wptr = si_ih_get_wptr,
+ .decode_iv = si_ih_decode_iv,
+ .set_rptr = si_ih_set_rptr
+};
+
+static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &si_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version si_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &si_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.h b/drivers/gpu/drm/amd/amdgpu/si_ih.h
new file mode 100644
index 000000000..42e64a53e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SI_IH_H__
+#define __SI_IH_H__
+
+extern const struct amdgpu_ip_block_version si_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
new file mode 100644
index 000000000..9a39cbfe6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -0,0 +1,2494 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef SI_H
+#define SI_H
+
+#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
+
+#define SI_MAX_SH_GPRS 256
+#define SI_MAX_TEMP_GPRS 16
+#define SI_MAX_SH_THREADS 256
+#define SI_MAX_SH_STACK_ENTRIES 4096
+#define SI_MAX_FRC_EOV_CNT 16384
+#define SI_MAX_BACKENDS 8
+#define SI_MAX_BACKENDS_MASK 0xFF
+#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F
+#define SI_MAX_SIMDS 12
+#define SI_MAX_SIMDS_MASK 0x0FFF
+#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF
+#define SI_MAX_PIPES 8
+#define SI_MAX_PIPES_MASK 0xFF
+#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F
+#define SI_MAX_LDS_NUM 0xFFFF
+#define SI_MAX_TCC 16
+#define SI_MAX_TCC_MASK 0xFFFF
+#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
+
+/* SMC IND accessor regs */
+#define SMC_IND_INDEX_0 0x80
+#define SMC_IND_DATA_0 0x81
+
+#define SMC_IND_ACCESS_CNTL 0x8A
+# define AUTO_INCREMENT_IND_0 (1 << 0)
+#define SMC_MESSAGE_0 0x8B
+#define SMC_RESP_0 0x8C
+
+/* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
+#define SMC_CG_IND_START 0xc0030000
+#define SMC_CG_IND_END 0xc0040000
+
+#define CG_CGTT_LOCAL_0 0x400
+#define CG_CGTT_LOCAL_1 0x401
+
+/* SMC IND registers */
+#define SMC_SYSCON_RESET_CNTL 0x80000000
+# define RST_REG (1 << 0)
+#define SMC_SYSCON_CLOCK_CNTL_0 0x80000004
+# define CK_DISABLE (1 << 0)
+# define CKEN (1 << 24)
+
+#define VGA_HDP_CONTROL 0xCA
+#define VGA_MEMORY_DISABLE (1 << 4)
+
+#define DCCG_DISP_SLOW_SELECT_REG 0x13F
+#define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0)
+#define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0)
+#define DCCG_DISP1_SLOW_SELECT_SHIFT 0
+#define DCCG_DISP2_SLOW_SELECT(x) ((x) << 4)
+#define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4)
+#define DCCG_DISP2_SLOW_SELECT_SHIFT 4
+
+#define CG_SPLL_FUNC_CNTL 0x180
+#define SPLL_RESET (1 << 0)
+#define SPLL_SLEEP (1 << 1)
+#define SPLL_BYPASS_EN (1 << 3)
+#define SPLL_REF_DIV(x) ((x) << 4)
+#define SPLL_REF_DIV_MASK (0x3f << 4)
+#define SPLL_PDIV_A(x) ((x) << 20)
+#define SPLL_PDIV_A_MASK (0x7f << 20)
+#define SPLL_PDIV_A_SHIFT 20
+#define CG_SPLL_FUNC_CNTL_2 0x181
+#define SCLK_MUX_SEL(x) ((x) << 0)
+#define SCLK_MUX_SEL_MASK (0x1ff << 0)
+#define SPLL_CTLREQ_CHG (1 << 23)
+#define SCLK_MUX_UPDATE (1 << 26)
+#define CG_SPLL_FUNC_CNTL_3 0x182
+#define SPLL_FB_DIV(x) ((x) << 0)
+#define SPLL_FB_DIV_MASK (0x3ffffff << 0)
+#define SPLL_FB_DIV_SHIFT 0
+#define SPLL_DITHEN (1 << 28)
+#define CG_SPLL_FUNC_CNTL_4 0x183
+
+#define SPLL_STATUS 0x185
+#define SPLL_CHG_STATUS (1 << 1)
+#define SPLL_CNTL_MODE 0x186
+#define SPLL_SW_DIR_CONTROL (1 << 0)
+# define SPLL_REFCLK_SEL(x) ((x) << 26)
+# define SPLL_REFCLK_SEL_MASK (3 << 26)
+
+#define CG_SPLL_SPREAD_SPECTRUM 0x188
+#define SSEN (1 << 0)
+#define CLK_S(x) ((x) << 4)
+#define CLK_S_MASK (0xfff << 4)
+#define CLK_S_SHIFT 4
+#define CG_SPLL_SPREAD_SPECTRUM_2 0x189
+#define CLK_V(x) ((x) << 0)
+#define CLK_V_MASK (0x3ffffff << 0)
+#define CLK_V_SHIFT 0
+
+#define CG_SPLL_AUTOSCALE_CNTL 0x18b
+# define AUTOSCALE_ON_SS_CLEAR (1 << 9)
+
+/* discrete uvd clocks */
+#define CG_UPLL_FUNC_CNTL 0x18d
+# define UPLL_RESET_MASK 0x00000001
+# define UPLL_SLEEP_MASK 0x00000002
+# define UPLL_BYPASS_EN_MASK 0x00000004
+# define UPLL_CTLREQ_MASK 0x00000008
+# define UPLL_VCO_MODE_MASK 0x00000600
+# define UPLL_REF_DIV_MASK 0x003F0000
+# define UPLL_CTLACK_MASK 0x40000000
+# define UPLL_CTLACK2_MASK 0x80000000
+#define CG_UPLL_FUNC_CNTL_2 0x18e
+# define UPLL_PDIV_A(x) ((x) << 0)
+# define UPLL_PDIV_A_MASK 0x0000007F
+# define UPLL_PDIV_B(x) ((x) << 8)
+# define UPLL_PDIV_B_MASK 0x00007F00
+# define VCLK_SRC_SEL(x) ((x) << 20)
+# define VCLK_SRC_SEL_MASK 0x01F00000
+# define DCLK_SRC_SEL(x) ((x) << 25)
+# define DCLK_SRC_SEL_MASK 0x3E000000
+#define CG_UPLL_FUNC_CNTL_3 0x18f
+# define UPLL_FB_DIV(x) ((x) << 0)
+# define UPLL_FB_DIV_MASK 0x01FFFFFF
+#define CG_UPLL_FUNC_CNTL_4 0x191
+# define UPLL_SPARE_ISPARE9 0x00020000
+#define CG_UPLL_FUNC_CNTL_5 0x192
+# define RESET_ANTI_MUX_MASK 0x00000200
+#define CG_UPLL_SPREAD_SPECTRUM 0x194
+# define SSEN_MASK 0x00000001
+
+#define MPLL_BYPASSCLK_SEL 0x197
+# define MPLL_CLKOUT_SEL(x) ((x) << 8)
+# define MPLL_CLKOUT_SEL_MASK 0xFF00
+
+#define CG_CLKPIN_CNTL 0x198
+# define XTALIN_DIVIDE (1 << 1)
+# define BCLK_AS_XCLK (1 << 2)
+#define CG_CLKPIN_CNTL_2 0x199
+# define FORCE_BIF_REFCLK_EN (1 << 3)
+# define MUX_TCLK_TO_XCLK (1 << 8)
+
+#define THM_CLK_CNTL 0x19b
+# define CMON_CLK_SEL(x) ((x) << 0)
+# define CMON_CLK_SEL_MASK 0xFF
+# define TMON_CLK_SEL(x) ((x) << 8)
+# define TMON_CLK_SEL_MASK 0xFF00
+#define MISC_CLK_CNTL 0x19c
+# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0)
+# define DEEP_SLEEP_CLK_SEL_MASK 0xFF
+# define ZCLK_SEL(x) ((x) << 8)
+# define ZCLK_SEL_MASK 0xFF00
+
+#define CG_THERMAL_CTRL 0x1c0
+#define DPM_EVENT_SRC(x) ((x) << 0)
+#define DPM_EVENT_SRC_MASK (7 << 0)
+#define DIG_THERM_DPM(x) ((x) << 14)
+#define DIG_THERM_DPM_MASK 0x003FC000
+#define DIG_THERM_DPM_SHIFT 14
+#define CG_THERMAL_STATUS 0x1c1
+#define FDO_PWM_DUTY(x) ((x) << 9)
+#define FDO_PWM_DUTY_MASK (0xff << 9)
+#define FDO_PWM_DUTY_SHIFT 9
+#define CG_THERMAL_INT 0x1c2
+#define DIG_THERM_INTH(x) ((x) << 8)
+#define DIG_THERM_INTH_MASK 0x0000FF00
+#define DIG_THERM_INTH_SHIFT 8
+#define DIG_THERM_INTL(x) ((x) << 16)
+#define DIG_THERM_INTL_MASK 0x00FF0000
+#define DIG_THERM_INTL_SHIFT 16
+#define THERM_INT_MASK_HIGH (1 << 24)
+#define THERM_INT_MASK_LOW (1 << 25)
+
+#define CG_MULT_THERMAL_CTRL 0x1c4
+#define TEMP_SEL(x) ((x) << 20)
+#define TEMP_SEL_MASK (0xff << 20)
+#define TEMP_SEL_SHIFT 20
+#define CG_MULT_THERMAL_STATUS 0x1c5
+#define ASIC_MAX_TEMP(x) ((x) << 0)
+#define ASIC_MAX_TEMP_MASK 0x000001ff
+#define ASIC_MAX_TEMP_SHIFT 0
+#define CTF_TEMP(x) ((x) << 9)
+#define CTF_TEMP_MASK 0x0003fe00
+#define CTF_TEMP_SHIFT 9
+
+#define CG_FDO_CTRL0 0x1d5
+#define FDO_STATIC_DUTY(x) ((x) << 0)
+#define FDO_STATIC_DUTY_MASK 0x000000FF
+#define FDO_STATIC_DUTY_SHIFT 0
+#define CG_FDO_CTRL1 0x1d6
+#define FMAX_DUTY100(x) ((x) << 0)
+#define FMAX_DUTY100_MASK 0x000000FF
+#define FMAX_DUTY100_SHIFT 0
+#define CG_FDO_CTRL2 0x1d7
+#define TMIN(x) ((x) << 0)
+#define TMIN_MASK 0x000000FF
+#define TMIN_SHIFT 0
+#define FDO_PWM_MODE(x) ((x) << 11)
+#define FDO_PWM_MODE_MASK (7 << 11)
+#define FDO_PWM_MODE_SHIFT 11
+#define TACH_PWM_RESP_RATE(x) ((x) << 25)
+#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
+#define TACH_PWM_RESP_RATE_SHIFT 25
+
+#define CG_TACH_CTRL 0x1dc
+# define EDGE_PER_REV(x) ((x) << 0)
+# define EDGE_PER_REV_MASK (0x7 << 0)
+# define EDGE_PER_REV_SHIFT 0
+# define TARGET_PERIOD(x) ((x) << 3)
+# define TARGET_PERIOD_MASK 0xfffffff8
+# define TARGET_PERIOD_SHIFT 3
+#define CG_TACH_STATUS 0x1dd
+# define TACH_PERIOD(x) ((x) << 0)
+# define TACH_PERIOD_MASK 0xffffffff
+# define TACH_PERIOD_SHIFT 0
+
+#define GENERAL_PWRMGT 0x1e0
+# define GLOBAL_PWRMGT_EN (1 << 0)
+# define STATIC_PM_EN (1 << 1)
+# define THERMAL_PROTECTION_DIS (1 << 2)
+# define THERMAL_PROTECTION_TYPE (1 << 3)
+# define SW_SMIO_INDEX(x) ((x) << 6)
+# define SW_SMIO_INDEX_MASK (1 << 6)
+# define SW_SMIO_INDEX_SHIFT 6
+# define VOLT_PWRMGT_EN (1 << 10)
+# define DYN_SPREAD_SPECTRUM_EN (1 << 23)
+#define CG_TPC 0x1e1
+#define SCLK_PWRMGT_CNTL 0x1e2
+# define SCLK_PWRMGT_OFF (1 << 0)
+# define SCLK_LOW_D1 (1 << 1)
+# define FIR_RESET (1 << 4)
+# define FIR_FORCE_TREND_SEL (1 << 5)
+# define FIR_TREND_MODE (1 << 6)
+# define DYN_GFX_CLK_OFF_EN (1 << 7)
+# define GFX_CLK_FORCE_ON (1 << 8)
+# define GFX_CLK_REQUEST_OFF (1 << 9)
+# define GFX_CLK_FORCE_OFF (1 << 10)
+# define GFX_CLK_OFF_ACPI_D1 (1 << 11)
+# define GFX_CLK_OFF_ACPI_D2 (1 << 12)
+# define GFX_CLK_OFF_ACPI_D3 (1 << 13)
+# define DYN_LIGHT_SLEEP_EN (1 << 14)
+
+#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6
+# define CURRENT_STATE_INDEX_MASK (0xf << 4)
+# define CURRENT_STATE_INDEX_SHIFT 4
+
+#define CG_FTV 0x1ef
+
+#define CG_FFCT_0 0x1f0
+# define UTC_0(x) ((x) << 0)
+# define UTC_0_MASK (0x3ff << 0)
+# define DTC_0(x) ((x) << 10)
+# define DTC_0_MASK (0x3ff << 10)
+
+#define CG_BSP 0x1ff
+# define BSP(x) ((x) << 0)
+# define BSP_MASK (0xffff << 0)
+# define BSU(x) ((x) << 16)
+# define BSU_MASK (0xf << 16)
+#define CG_AT 0x200
+# define CG_R(x) ((x) << 0)
+# define CG_R_MASK (0xffff << 0)
+# define CG_L(x) ((x) << 16)
+# define CG_L_MASK (0xffff << 16)
+
+#define CG_GIT 0x201
+# define CG_GICST(x) ((x) << 0)
+# define CG_GICST_MASK (0xffff << 0)
+# define CG_GIPOT(x) ((x) << 16)
+# define CG_GIPOT_MASK (0xffff << 16)
+
+#define CG_SSP 0x203
+# define SST(x) ((x) << 0)
+# define SST_MASK (0xffff << 0)
+# define SSTU(x) ((x) << 16)
+# define SSTU_MASK (0xf << 16)
+
+#define CG_DISPLAY_GAP_CNTL 0x20a
+# define DISP1_GAP(x) ((x) << 0)
+# define DISP1_GAP_MASK (3 << 0)
+# define DISP2_GAP(x) ((x) << 2)
+# define DISP2_GAP_MASK (3 << 2)
+# define VBI_TIMER_COUNT(x) ((x) << 4)
+# define VBI_TIMER_COUNT_MASK (0x3fff << 4)
+# define VBI_TIMER_UNIT(x) ((x) << 20)
+# define VBI_TIMER_UNIT_MASK (7 << 20)
+# define DISP1_GAP_MCHG(x) ((x) << 24)
+# define DISP1_GAP_MCHG_MASK (3 << 24)
+# define DISP2_GAP_MCHG(x) ((x) << 26)
+# define DISP2_GAP_MCHG_MASK (3 << 26)
+
+#define CG_ULV_CONTROL 0x21e
+#define CG_ULV_PARAMETER 0x21f
+
+#define SMC_SCRATCH0 0x221
+
+#define CG_CAC_CTRL 0x22e
+# define CAC_WINDOW(x) ((x) << 0)
+# define CAC_WINDOW_MASK 0x00ffffff
+
+#define DMIF_ADDR_CONFIG 0x2F5
+
+#define DMIF_ADDR_CALC 0x300
+
+#define PIPE0_DMIF_BUFFER_CONTROL 0x0328
+# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0)
+# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4)
+
+#define SRBM_STATUS 0x394
+#define GRBM_RQ_PENDING (1 << 5)
+#define VMC_BUSY (1 << 8)
+#define MCB_BUSY (1 << 9)
+#define MCB_NON_DISPLAY_BUSY (1 << 10)
+#define MCC_BUSY (1 << 11)
+#define MCD_BUSY (1 << 12)
+#define SEM_BUSY (1 << 14)
+#define IH_BUSY (1 << 17)
+
+#define SRBM_SOFT_RESET 0x398
+#define SOFT_RESET_BIF (1 << 1)
+#define SOFT_RESET_DC (1 << 5)
+#define SOFT_RESET_DMA1 (1 << 6)
+#define SOFT_RESET_GRBM (1 << 8)
+#define SOFT_RESET_HDP (1 << 9)
+#define SOFT_RESET_IH (1 << 10)
+#define SOFT_RESET_MC (1 << 11)
+#define SOFT_RESET_ROM (1 << 14)
+#define SOFT_RESET_SEM (1 << 15)
+#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
+#define SOFT_RESET_TST (1 << 21)
+#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_ORB (1 << 23)
+
+#define CC_SYS_RB_BACKEND_DISABLE 0x3A0
+#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1
+
+#define SRBM_READ_ERROR 0x3A6
+#define SRBM_INT_CNTL 0x3A8
+#define SRBM_INT_ACK 0x3AA
+
+#define SRBM_STATUS2 0x3B1
+#define DMA_BUSY (1 << 5)
+#define DMA1_BUSY (1 << 6)
+
+#define VM_L2_CNTL 0x500
+#define ENABLE_L2_CACHE (1 << 0)
+#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
+#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
+#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
+#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
+#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
+#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
+#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
+#define VM_L2_CNTL2 0x501
+#define INVALIDATE_ALL_L1_TLBS (1 << 0)
+#define INVALIDATE_L2_CACHE (1 << 1)
+#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
+#define INVALIDATE_PTE_AND_PDE_CACHES 0
+#define INVALIDATE_ONLY_PTE_CACHES 1
+#define INVALIDATE_ONLY_PDE_CACHES 2
+#define VM_L2_CNTL3 0x502
+#define BANK_SELECT(x) ((x) << 0)
+#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
+#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
+#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
+#define VM_L2_STATUS 0x503
+#define L2_BUSY (1 << 0)
+#define VM_CONTEXT0_CNTL 0x504
+#define ENABLE_CONTEXT (1 << 0)
+#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
+#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
+#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
+#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
+#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
+#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
+#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
+#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
+#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
+#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
+#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24)
+#define VM_CONTEXT1_CNTL 0x505
+#define VM_CONTEXT0_CNTL2 0x50C
+#define VM_CONTEXT1_CNTL2 0x50D
+#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E
+#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F
+#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510
+#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511
+#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512
+#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513
+#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514
+#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515
+
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537
+#define PROTECTIONS_MASK (0xf << 0)
+#define PROTECTIONS_SHIFT 0
+ /* bit 0: range
+ * bit 1: pde0
+ * bit 2: valid
+ * bit 3: read
+ * bit 4: write
+ */
+#define MEMORY_CLIENT_ID_MASK (0xff << 12)
+#define MEMORY_CLIENT_ID_SHIFT 12
+#define MEMORY_CLIENT_RW_MASK (1 << 24)
+#define MEMORY_CLIENT_RW_SHIFT 24
+#define FAULT_VMID_MASK (0xf << 25)
+#define FAULT_VMID_SHIFT 25
+
+#define VM_INVALIDATE_REQUEST 0x51E
+#define VM_INVALIDATE_RESPONSE 0x51F
+
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546
+#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547
+
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F
+#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550
+#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551
+#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552
+#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553
+#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554
+#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555
+#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557
+#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558
+
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F
+#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560
+
+#define VM_L2_CG 0x570
+#define MC_CG_ENABLE (1 << 18)
+#define MC_LS_ENABLE (1 << 19)
+
+#define MC_SHARED_CHMAP 0x801
+#define NOOFCHAN_SHIFT 12
+#define NOOFCHAN_MASK 0x0000f000
+#define MC_SHARED_CHREMAP 0x802
+
+#define MC_VM_FB_LOCATION 0x809
+#define MC_VM_AGP_TOP 0x80A
+#define MC_VM_AGP_BOT 0x80B
+#define MC_VM_AGP_BASE 0x80C
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x80D
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x80E
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x80F
+
+#define MC_VM_MX_L1_TLB_CNTL 0x819
+#define ENABLE_L1_TLB (1 << 0)
+#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
+#define SYSTEM_ACCESS_MODE_PA_ONLY (0 << 3)
+#define SYSTEM_ACCESS_MODE_USE_SYS_MAP (1 << 3)
+#define SYSTEM_ACCESS_MODE_IN_SYS (2 << 3)
+#define SYSTEM_ACCESS_MODE_NOT_IN_SYS (3 << 3)
+#define SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU (0 << 5)
+#define ENABLE_ADVANCED_DRIVER_MODEL (1 << 6)
+
+#define MC_SHARED_BLACKOUT_CNTL 0x82B
+
+#define MC_HUB_MISC_HUB_CG 0x82E
+#define MC_HUB_MISC_VM_CG 0x82F
+
+#define MC_HUB_MISC_SIP_CG 0x830
+
+#define MC_XPB_CLK_GAT 0x91E
+
+#define MC_CITF_MISC_RD_CG 0x992
+#define MC_CITF_MISC_WR_CG 0x993
+#define MC_CITF_MISC_VM_CG 0x994
+
+#define MC_ARB_RAMCFG 0x9D8
+#define NOOFBANK_SHIFT 0
+#define NOOFBANK_MASK 0x00000003
+#define NOOFRANK_SHIFT 2
+#define NOOFRANK_MASK 0x00000004
+#define NOOFROWS_SHIFT 3
+#define NOOFROWS_MASK 0x00000038
+#define NOOFCOLS_SHIFT 6
+#define NOOFCOLS_MASK 0x000000C0
+#define CHANSIZE_SHIFT 8
+#define CHANSIZE_MASK 0x00000100
+#define CHANSIZE_OVERRIDE (1 << 11)
+#define NOOFGROUPS_SHIFT 12
+#define NOOFGROUPS_MASK 0x00001000
+
+#define MC_ARB_DRAM_TIMING 0x9DD
+#define MC_ARB_DRAM_TIMING2 0x9DE
+
+#define MC_ARB_BURST_TIME 0xA02
+#define STATE0(x) ((x) << 0)
+#define STATE0_MASK (0x1f << 0)
+#define STATE0_SHIFT 0
+#define STATE1(x) ((x) << 5)
+#define STATE1_MASK (0x1f << 5)
+#define STATE1_SHIFT 5
+#define STATE2(x) ((x) << 10)
+#define STATE2_MASK (0x1f << 10)
+#define STATE2_SHIFT 10
+#define STATE3(x) ((x) << 15)
+#define STATE3_MASK (0x1f << 15)
+#define STATE3_SHIFT 15
+
+#define MC_SEQ_TRAIN_WAKEUP_CNTL 0xA3A
+#define TRAIN_DONE_D0 (1 << 30)
+#define TRAIN_DONE_D1 (1 << 31)
+
+#define MC_SEQ_SUP_CNTL 0xA32
+#define RUN_MASK (1 << 0)
+#define MC_SEQ_SUP_PGM 0xA33
+#define MC_PMG_AUTO_CMD 0xA34
+
+#define MC_IO_PAD_CNTL_D0 0xA74
+#define MEM_FALL_OUT_CMD (1 << 8)
+
+#define MC_SEQ_RAS_TIMING 0xA28
+#define MC_SEQ_CAS_TIMING 0xA29
+#define MC_SEQ_MISC_TIMING 0xA2A
+#define MC_SEQ_MISC_TIMING2 0xA2B
+#define MC_SEQ_PMG_TIMING 0xA2C
+#define MC_SEQ_RD_CTL_D0 0xA2D
+#define MC_SEQ_RD_CTL_D1 0xA2E
+#define MC_SEQ_WR_CTL_D0 0xA2F
+#define MC_SEQ_WR_CTL_D1 0xA30
+
+#define MC_SEQ_MISC0 0xA80
+#define MC_SEQ_MISC0_VEN_ID_SHIFT 8
+#define MC_SEQ_MISC0_VEN_ID_MASK 0x00000f00
+#define MC_SEQ_MISC0_VEN_ID_VALUE 3
+#define MC_SEQ_MISC0_REV_ID_SHIFT 12
+#define MC_SEQ_MISC0_REV_ID_MASK 0x0000f000
+#define MC_SEQ_MISC0_REV_ID_VALUE 1
+#define MC_SEQ_MISC0_GDDR5_SHIFT 28
+#define MC_SEQ_MISC0_GDDR5_MASK 0xf0000000
+#define MC_SEQ_MISC0_GDDR5_VALUE 5
+#define MC_SEQ_MISC1 0xA81
+#define MC_SEQ_RESERVE_M 0xA82
+#define MC_PMG_CMD_EMRS 0xA83
+
+#define MC_SEQ_IO_DEBUG_INDEX 0xA91
+#define MC_SEQ_IO_DEBUG_DATA 0xA92
+
+#define MC_SEQ_MISC5 0xA95
+#define MC_SEQ_MISC6 0xA96
+
+#define MC_SEQ_MISC7 0xA99
+
+#define MC_SEQ_RAS_TIMING_LP 0xA9B
+#define MC_SEQ_CAS_TIMING_LP 0xA9C
+#define MC_SEQ_MISC_TIMING_LP 0xA9D
+#define MC_SEQ_MISC_TIMING2_LP 0xA9E
+#define MC_SEQ_WR_CTL_D0_LP 0xA9F
+#define MC_SEQ_WR_CTL_D1_LP 0xAA0
+#define MC_SEQ_PMG_CMD_EMRS_LP 0xAA1
+#define MC_SEQ_PMG_CMD_MRS_LP 0xAA2
+
+#define MC_PMG_CMD_MRS 0xAAB
+
+#define MC_SEQ_RD_CTL_D0_LP 0xAC7
+#define MC_SEQ_RD_CTL_D1_LP 0xAC8
+
+#define MC_PMG_CMD_MRS1 0xAD1
+#define MC_SEQ_PMG_CMD_MRS1_LP 0xAD2
+#define MC_SEQ_PMG_TIMING_LP 0xAD3
+
+#define MC_SEQ_WR_CTL_2 0xAD5
+#define MC_SEQ_WR_CTL_2_LP 0xAD6
+#define MC_PMG_CMD_MRS2 0xAD7
+#define MC_SEQ_PMG_CMD_MRS2_LP 0xAD8
+
+#define MCLK_PWRMGT_CNTL 0xAE8
+# define DLL_SPEED(x) ((x) << 0)
+# define DLL_SPEED_MASK (0x1f << 0)
+# define DLL_READY (1 << 6)
+# define MC_INT_CNTL (1 << 7)
+# define MRDCK0_PDNB (1 << 8)
+# define MRDCK1_PDNB (1 << 9)
+# define MRDCK0_RESET (1 << 16)
+# define MRDCK1_RESET (1 << 17)
+# define DLL_READY_READ (1 << 24)
+#define DLL_CNTL 0xAE9
+# define MRDCK0_BYPASS (1 << 24)
+# define MRDCK1_BYPASS (1 << 25)
+
+#define MPLL_CNTL_MODE 0xAEC
+# define MPLL_MCLK_SEL (1 << 11)
+#define MPLL_FUNC_CNTL 0xAED
+#define BWCTRL(x) ((x) << 20)
+#define BWCTRL_MASK (0xff << 20)
+#define MPLL_FUNC_CNTL_1 0xAEE
+#define VCO_MODE(x) ((x) << 0)
+#define VCO_MODE_MASK (3 << 0)
+#define CLKFRAC(x) ((x) << 4)
+#define CLKFRAC_MASK (0xfff << 4)
+#define CLKF(x) ((x) << 16)
+#define CLKF_MASK (0xfff << 16)
+#define MPLL_FUNC_CNTL_2 0xAEF
+#define MPLL_AD_FUNC_CNTL 0xAF0
+#define YCLK_POST_DIV(x) ((x) << 0)
+#define YCLK_POST_DIV_MASK (7 << 0)
+#define MPLL_DQ_FUNC_CNTL 0xAF1
+#define YCLK_SEL(x) ((x) << 4)
+#define YCLK_SEL_MASK (1 << 4)
+
+#define MPLL_SS1 0xAF3
+#define CLKV(x) ((x) << 0)
+#define CLKV_MASK (0x3ffffff << 0)
+#define MPLL_SS2 0xAF4
+#define CLKS(x) ((x) << 0)
+#define CLKS_MASK (0xfff << 0)
+
+#define HDP_HOST_PATH_CNTL 0xB00
+#define CLOCK_GATING_DIS (1 << 23)
+#define HDP_NONSURFACE_BASE 0xB01
+#define HDP_NONSURFACE_INFO 0xB02
+#define HDP_NONSURFACE_SIZE 0xB03
+
+#define HDP_DEBUG0 0xBCC
+
+#define HDP_ADDR_CONFIG 0xBD2
+#define HDP_MISC_CNTL 0xBD3
+#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
+#define HDP_MEM_POWER_LS 0xBD4
+#define HDP_LS_ENABLE (1 << 0)
+
+#define ATC_MISC_CG 0xCD4
+
+#define IH_RB_CNTL 0xF80
+# define IH_RB_ENABLE (1 << 0)
+# define IH_IB_SIZE(x) ((x) << 1) /* log2 */
+# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
+# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
+# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
+# define IH_WPTR_OVERFLOW_ENABLE (1 << 16)
+# define IH_WPTR_OVERFLOW_CLEAR (1 << 31)
+#define IH_RB_BASE 0xF81
+#define IH_RB_RPTR 0xF82
+#define IH_RB_WPTR 0xF83
+# define RB_OVERFLOW (1 << 0)
+# define WPTR_OFFSET_MASK 0x3fffc
+#define IH_RB_WPTR_ADDR_HI 0xF84
+#define IH_RB_WPTR_ADDR_LO 0xF85
+#define IH_CNTL 0xF86
+# define ENABLE_INTR (1 << 0)
+# define IH_MC_SWAP(x) ((x) << 1)
+# define IH_MC_SWAP_NONE 0
+# define IH_MC_SWAP_16BIT 1
+# define IH_MC_SWAP_32BIT 2
+# define IH_MC_SWAP_64BIT 3
+# define RPTR_REARM (1 << 4)
+# define MC_WRREQ_CREDIT(x) ((x) << 15)
+# define MC_WR_CLEAN_CNT(x) ((x) << 20)
+# define MC_VMID(x) ((x) << 25)
+
+#define CONFIG_MEMSIZE 0x150A
+
+#define INTERRUPT_CNTL 0x151A
+# define IH_DUMMY_RD_OVERRIDE (1 << 0)
+# define IH_DUMMY_RD_EN (1 << 1)
+# define IH_REQ_NONSNOOP_EN (1 << 3)
+# define GEN_IH_INT_EN (1 << 8)
+#define INTERRUPT_CNTL2 0x151B
+
+#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520
+
+#define BIF_FB_EN 0x1524
+#define FB_READ_EN (1 << 0)
+#define FB_WRITE_EN (1 << 1)
+
+#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528
+
+/* DCE6 ELD audio interface */
+#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780
+# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0)
+# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8)
+#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781
+
+#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25
+#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0)
+#define SPEAKER_ALLOCATION_MASK (0x7f << 0)
+#define SPEAKER_ALLOCATION_SHIFT 0
+#define HDMI_CONNECTION (1 << 16)
+#define DP_CONNECTION (1 << 17)
+
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */
+# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
+/* max channels minus one. 7 = 8 channels */
+# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
+# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
+# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37
+# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0)
+# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8)
+/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
+ * 0 = invalid
+ * x = legal delay value
+ * 255 = sync not supported
+ */
+#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38
+# define HBR_CAPABLE (1 << 0) /* enabled by default */
+
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a
+# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0)
+# define PRODUCT_ID(x) (((x) & 0xffff) << 16)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b
+# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c
+# define PORT_ID0(x) (((x) & 0xffffffff) << 0)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d
+# define PORT_ID1(x) (((x) & 0xffffffff) << 0)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e
+# define DESCRIPTION0(x) (((x) & 0xff) << 0)
+# define DESCRIPTION1(x) (((x) & 0xff) << 8)
+# define DESCRIPTION2(x) (((x) & 0xff) << 16)
+# define DESCRIPTION3(x) (((x) & 0xff) << 24)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f
+# define DESCRIPTION4(x) (((x) & 0xff) << 0)
+# define DESCRIPTION5(x) (((x) & 0xff) << 8)
+# define DESCRIPTION6(x) (((x) & 0xff) << 16)
+# define DESCRIPTION7(x) (((x) & 0xff) << 24)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40
+# define DESCRIPTION8(x) (((x) & 0xff) << 0)
+# define DESCRIPTION9(x) (((x) & 0xff) << 8)
+# define DESCRIPTION10(x) (((x) & 0xff) << 16)
+# define DESCRIPTION11(x) (((x) & 0xff) << 24)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41
+# define DESCRIPTION12(x) (((x) & 0xff) << 0)
+# define DESCRIPTION13(x) (((x) & 0xff) << 8)
+# define DESCRIPTION14(x) (((x) & 0xff) << 16)
+# define DESCRIPTION15(x) (((x) & 0xff) << 24)
+#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42
+# define DESCRIPTION16(x) (((x) & 0xff) << 0)
+# define DESCRIPTION17(x) (((x) & 0xff) << 8)
+
+#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54
+# define AUDIO_ENABLED (1 << 31)
+
+#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56
+#define PORT_CONNECTIVITY_MASK (3 << 30)
+#define PORT_CONNECTIVITY_SHIFT 30
+
+#define DC_LB_MEMORY_SPLIT 0x1AC3
+#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
+
+#define PRIORITY_A_CNT 0x1AC6
+#define PRIORITY_MARK_MASK 0x7fff
+#define PRIORITY_OFF (1 << 16)
+#define PRIORITY_ALWAYS_ON (1 << 20)
+#define PRIORITY_B_CNT 0x1AC7
+
+#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32
+# define LATENCY_WATERMARK_MASK(x) ((x) << 16)
+#define DPG_PIPE_LATENCY_CONTROL 0x1B33
+# define LATENCY_LOW_WATERMARK(x) ((x) << 0)
+# define LATENCY_HIGH_WATERMARK(x) ((x) << 16)
+
+/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
+#define VLINE_STATUS 0x1AEE
+# define VLINE_OCCURRED (1 << 0)
+# define VLINE_ACK (1 << 4)
+# define VLINE_STAT (1 << 12)
+# define VLINE_INTERRUPT (1 << 16)
+# define VLINE_INTERRUPT_TYPE (1 << 17)
+/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
+#define VBLANK_STATUS 0x1AEF
+# define VBLANK_OCCURRED (1 << 0)
+# define VBLANK_ACK (1 << 4)
+# define VBLANK_STAT (1 << 12)
+# define VBLANK_INTERRUPT (1 << 16)
+# define VBLANK_INTERRUPT_TYPE (1 << 17)
+
+/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
+#define INT_MASK 0x1AD0
+# define VBLANK_INT_MASK (1 << 0)
+# define VLINE_INT_MASK (1 << 4)
+
+#define DISP_INTERRUPT_STATUS 0x183D
+# define LB_D1_VLINE_INTERRUPT (1 << 2)
+# define LB_D1_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD1_INTERRUPT (1 << 17)
+# define DC_HPD1_RX_INTERRUPT (1 << 18)
+# define DACA_AUTODETECT_INTERRUPT (1 << 22)
+# define DACB_AUTODETECT_INTERRUPT (1 << 23)
+# define DC_I2C_SW_DONE_INTERRUPT (1 << 24)
+# define DC_I2C_HW_DONE_INTERRUPT (1 << 25)
+#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E
+# define LB_D2_VLINE_INTERRUPT (1 << 2)
+# define LB_D2_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD2_INTERRUPT (1 << 17)
+# define DC_HPD2_RX_INTERRUPT (1 << 18)
+# define DISP_TIMER_INTERRUPT (1 << 24)
+#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F
+# define LB_D3_VLINE_INTERRUPT (1 << 2)
+# define LB_D3_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD3_INTERRUPT (1 << 17)
+# define DC_HPD3_RX_INTERRUPT (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840
+# define LB_D4_VLINE_INTERRUPT (1 << 2)
+# define LB_D4_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD4_INTERRUPT (1 << 17)
+# define DC_HPD4_RX_INTERRUPT (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853
+# define LB_D5_VLINE_INTERRUPT (1 << 2)
+# define LB_D5_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD5_INTERRUPT (1 << 17)
+# define DC_HPD5_RX_INTERRUPT (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854
+# define LB_D6_VLINE_INTERRUPT (1 << 2)
+# define LB_D6_VBLANK_INTERRUPT (1 << 3)
+# define DC_HPD6_INTERRUPT (1 << 17)
+# define DC_HPD6_RX_INTERRUPT (1 << 18)
+
+/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
+#define GRPH_INT_STATUS 0x1A16
+# define GRPH_PFLIP_INT_OCCURRED (1 << 0)
+# define GRPH_PFLIP_INT_CLEAR (1 << 8)
+/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
+#define GRPH_INT_CONTROL 0x1A17
+# define GRPH_PFLIP_INT_MASK (1 << 0)
+# define GRPH_PFLIP_INT_TYPE (1 << 8)
+
+#define DAC_AUTODETECT_INT_CONTROL 0x19F2
+
+#define DC_HPD1_INT_STATUS 0x1807
+#define DC_HPD2_INT_STATUS 0x180A
+#define DC_HPD3_INT_STATUS 0x180D
+#define DC_HPD4_INT_STATUS 0x1810
+#define DC_HPD5_INT_STATUS 0x1813
+#define DC_HPD6_INT_STATUS 0x1816
+# define DC_HPDx_INT_STATUS (1 << 0)
+# define DC_HPDx_SENSE (1 << 1)
+# define DC_HPDx_RX_INT_STATUS (1 << 8)
+
+#define DC_HPD1_INT_CONTROL 0x1808
+#define DC_HPD2_INT_CONTROL 0x180B
+#define DC_HPD3_INT_CONTROL 0x180E
+#define DC_HPD4_INT_CONTROL 0x1811
+#define DC_HPD5_INT_CONTROL 0x1814
+#define DC_HPD6_INT_CONTROL 0x1817
+# define DC_HPDx_INT_ACK (1 << 0)
+# define DC_HPDx_INT_POLARITY (1 << 8)
+# define DC_HPDx_INT_EN (1 << 16)
+# define DC_HPDx_RX_INT_ACK (1 << 20)
+# define DC_HPDx_RX_INT_EN (1 << 24)
+
+#define DC_HPD1_CONTROL 0x1809
+#define DC_HPD2_CONTROL 0x180C
+#define DC_HPD3_CONTROL 0x180F
+#define DC_HPD4_CONTROL 0x1812
+#define DC_HPD5_CONTROL 0x1815
+#define DC_HPD6_CONTROL 0x1818
+# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0)
+# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
+# define DC_HPDx_EN (1 << 28)
+
+#define DPG_PIPE_STUTTER_CONTROL 0x1B35
+# define STUTTER_ENABLE (1 << 0)
+
+/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
+#define CRTC_STATUS_FRAME_COUNT 0x1BA6
+
+/* Audio clocks */
+#define DCCG_AUDIO_DTO_SOURCE 0x05ac
+# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */
+# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */
+
+#define DCCG_AUDIO_DTO0_PHASE 0x05b0
+#define DCCG_AUDIO_DTO0_MODULE 0x05b4
+#define DCCG_AUDIO_DTO1_PHASE 0x05c0
+#define DCCG_AUDIO_DTO1_MODULE 0x05c4
+
+#define AFMT_AUDIO_SRC_CONTROL 0x1c4f
+#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0)
+/* AFMT_AUDIO_SRC_SELECT
+ * 0 = stream0
+ * 1 = stream1
+ * 2 = stream2
+ * 3 = stream3
+ * 4 = stream4
+ * 5 = stream5
+ */
+
+#define GRBM_CNTL 0x2000
+#define GRBM_READ_TIMEOUT(x) ((x) << 0)
+
+#define GRBM_STATUS2 0x2002
+#define RLC_RQ_PENDING (1 << 0)
+#define RLC_BUSY (1 << 8)
+#define TC_BUSY (1 << 9)
+
+#define GRBM_STATUS 0x2004
+#define CMDFIFO_AVAIL_MASK 0x0000000F
+#define RING2_RQ_PENDING (1 << 4)
+#define SRBM_RQ_PENDING (1 << 5)
+#define RING1_RQ_PENDING (1 << 6)
+#define CF_RQ_PENDING (1 << 7)
+#define PF_RQ_PENDING (1 << 8)
+#define GDS_DMA_RQ_PENDING (1 << 9)
+#define GRBM_EE_BUSY (1 << 10)
+#define DB_CLEAN (1 << 12)
+#define CB_CLEAN (1 << 13)
+#define TA_BUSY (1 << 14)
+#define GDS_BUSY (1 << 15)
+#define VGT_BUSY (1 << 17)
+#define IA_BUSY_NO_DMA (1 << 18)
+#define IA_BUSY (1 << 19)
+#define SX_BUSY (1 << 20)
+#define SPI_BUSY (1 << 22)
+#define BCI_BUSY (1 << 23)
+#define SC_BUSY (1 << 24)
+#define PA_BUSY (1 << 25)
+#define DB_BUSY (1 << 26)
+#define CP_COHERENCY_BUSY (1 << 28)
+#define CP_BUSY (1 << 29)
+#define CB_BUSY (1 << 30)
+#define GUI_ACTIVE (1 << 31)
+#define GRBM_STATUS_SE0 0x2005
+#define GRBM_STATUS_SE1 0x2006
+#define SE_DB_CLEAN (1 << 1)
+#define SE_CB_CLEAN (1 << 2)
+#define SE_BCI_BUSY (1 << 22)
+#define SE_VGT_BUSY (1 << 23)
+#define SE_PA_BUSY (1 << 24)
+#define SE_TA_BUSY (1 << 25)
+#define SE_SX_BUSY (1 << 26)
+#define SE_SPI_BUSY (1 << 27)
+#define SE_SC_BUSY (1 << 29)
+#define SE_DB_BUSY (1 << 30)
+#define SE_CB_BUSY (1 << 31)
+
+#define GRBM_SOFT_RESET 0x2008
+#define SOFT_RESET_CP (1 << 0)
+#define SOFT_RESET_CB (1 << 1)
+#define SOFT_RESET_RLC (1 << 2)
+#define SOFT_RESET_DB (1 << 3)
+#define SOFT_RESET_GDS (1 << 4)
+#define SOFT_RESET_PA (1 << 5)
+#define SOFT_RESET_SC (1 << 6)
+#define SOFT_RESET_BCI (1 << 7)
+#define SOFT_RESET_SPI (1 << 8)
+#define SOFT_RESET_SX (1 << 10)
+#define SOFT_RESET_TC (1 << 11)
+#define SOFT_RESET_TA (1 << 12)
+#define SOFT_RESET_VGT (1 << 14)
+#define SOFT_RESET_IA (1 << 15)
+
+#define GRBM_GFX_INDEX 0x200B
+#define INSTANCE_INDEX(x) ((x) << 0)
+#define SH_INDEX(x) ((x) << 8)
+#define SE_INDEX(x) ((x) << 16)
+#define SH_BROADCAST_WRITES (1 << 29)
+#define INSTANCE_BROADCAST_WRITES (1 << 30)
+#define SE_BROADCAST_WRITES (1 << 31)
+
+#define GRBM_INT_CNTL 0x2018
+# define RDERR_INT_ENABLE (1 << 0)
+# define GUI_IDLE_INT_ENABLE (1 << 19)
+
+#define CP_STRMOUT_CNTL 0x213F
+#define SCRATCH_REG0 0x2140
+#define SCRATCH_REG1 0x2141
+#define SCRATCH_REG2 0x2142
+#define SCRATCH_REG3 0x2143
+#define SCRATCH_REG4 0x2144
+#define SCRATCH_REG5 0x2145
+#define SCRATCH_REG6 0x2146
+#define SCRATCH_REG7 0x2147
+
+#define SCRATCH_UMSK 0x2150
+#define SCRATCH_ADDR 0x2151
+
+#define CP_SEM_WAIT_TIMER 0x216F
+
+#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172
+
+#define CP_ME_CNTL 0x21B6
+#define CP_CE_HALT (1 << 24)
+#define CP_PFP_HALT (1 << 26)
+#define CP_ME_HALT (1 << 28)
+
+#define CP_COHER_CNTL2 0x217A
+
+#define CP_RB2_RPTR 0x21BE
+#define CP_RB1_RPTR 0x21BF
+#define CP_RB0_RPTR 0x21C0
+#define CP_RB_WPTR_DELAY 0x21C1
+
+#define CP_QUEUE_THRESHOLDS 0x21D8
+#define ROQ_IB1_START(x) ((x) << 0)
+#define ROQ_IB2_START(x) ((x) << 8)
+#define CP_MEQ_THRESHOLDS 0x21D9
+#define MEQ1_START(x) ((x) << 0)
+#define MEQ2_START(x) ((x) << 8)
+
+#define CP_PERFMON_CNTL 0x21FF
+
+#define VGT_VTX_VECT_EJECT_REG 0x222C
+
+#define VGT_CACHE_INVALIDATION 0x2231
+#define CACHE_INVALIDATION(x) ((x) << 0)
+#define VC_ONLY 0
+#define TC_ONLY 1
+#define VC_AND_TC 2
+#define AUTO_INVLD_EN(x) ((x) << 6)
+#define NO_AUTO 0
+#define ES_AUTO 1
+#define GS_AUTO 2
+#define ES_AND_GS_AUTO 3
+#define VGT_ESGS_RING_SIZE 0x2232
+#define VGT_GSVS_RING_SIZE 0x2233
+
+#define VGT_GS_VERTEX_REUSE 0x2235
+
+#define VGT_PRIMITIVE_TYPE 0x2256
+#define VGT_INDEX_TYPE 0x2257
+
+#define VGT_NUM_INDICES 0x225C
+#define VGT_NUM_INSTANCES 0x225D
+
+#define VGT_TF_RING_SIZE 0x2262
+
+#define VGT_HS_OFFCHIP_PARAM 0x226C
+
+#define VGT_TF_MEMORY_BASE 0x226E
+
+#define CC_GC_SHADER_ARRAY_CONFIG 0x226F
+#define INACTIVE_CUS_MASK 0xFFFF0000
+#define INACTIVE_CUS_SHIFT 16
+#define GC_USER_SHADER_ARRAY_CONFIG 0x2270
+
+#define PA_CL_ENHANCE 0x2285
+#define CLIP_VTX_REORDER_ENA (1 << 0)
+#define NUM_CLIP_SEQ(x) ((x) << 1)
+
+#define PA_SU_LINE_STIPPLE_VALUE 0x2298
+
+#define PA_SC_LINE_STIPPLE_STATE 0x22C4
+
+#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9
+#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
+#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
+
+#define PA_SC_FIFO_SIZE 0x22F3
+#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0)
+#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6)
+#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15)
+#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23)
+
+#define PA_SC_ENHANCE 0x22FC
+
+#define SQ_CONFIG 0x2300
+
+#define SQC_CACHES 0x2302
+
+#define SQ_POWER_THROTTLE 0x2396
+#define MIN_POWER(x) ((x) << 0)
+#define MIN_POWER_MASK (0x3fff << 0)
+#define MIN_POWER_SHIFT 0
+#define MAX_POWER(x) ((x) << 16)
+#define MAX_POWER_MASK (0x3fff << 16)
+#define MAX_POWER_SHIFT 0
+#define SQ_POWER_THROTTLE2 0x2397
+#define MAX_POWER_DELTA(x) ((x) << 0)
+#define MAX_POWER_DELTA_MASK (0x3fff << 0)
+#define MAX_POWER_DELTA_SHIFT 0
+#define STI_SIZE(x) ((x) << 16)
+#define STI_SIZE_MASK (0x3ff << 16)
+#define STI_SIZE_SHIFT 16
+#define LTI_RATIO(x) ((x) << 27)
+#define LTI_RATIO_MASK (0xf << 27)
+#define LTI_RATIO_SHIFT 27
+
+#define SX_DEBUG_1 0x2418
+
+#define SPI_STATIC_THREAD_MGMT_1 0x2438
+#define SPI_STATIC_THREAD_MGMT_2 0x2439
+#define SPI_STATIC_THREAD_MGMT_3 0x243A
+#define SPI_PS_MAX_WAVE_ID 0x243B
+
+#define SPI_CONFIG_CNTL 0x2440
+
+#define SPI_CONFIG_CNTL_1 0x244F
+#define VTX_DONE_DELAY(x) ((x) << 0)
+#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
+
+#define CGTS_TCC_DISABLE 0x2452
+#define CGTS_USER_TCC_DISABLE 0x2453
+#define TCC_DISABLE_MASK 0xFFFF0000
+#define TCC_DISABLE_SHIFT 16
+#define CGTS_SM_CTRL_REG 0x2454
+#define OVERRIDE (1 << 21)
+#define LS_OVERRIDE (1 << 22)
+
+#define SPI_LB_CU_MASK 0x24D5
+
+#define TA_CNTL_AUX 0x2542
+
+#define CC_RB_BACKEND_DISABLE 0x263D
+#define BACKEND_DISABLE(x) ((x) << 16)
+#define GB_ADDR_CONFIG 0x263E
+#define NUM_PIPES(x) ((x) << 0)
+#define NUM_PIPES_MASK 0x00000007
+#define NUM_PIPES_SHIFT 0
+#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4)
+#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070
+#define PIPE_INTERLEAVE_SIZE_SHIFT 4
+#define NUM_SHADER_ENGINES(x) ((x) << 12)
+#define NUM_SHADER_ENGINES_MASK 0x00003000
+#define NUM_SHADER_ENGINES_SHIFT 12
+#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16)
+#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000
+#define SHADER_ENGINE_TILE_SIZE_SHIFT 16
+#define NUM_GPUS(x) ((x) << 20)
+#define NUM_GPUS_MASK 0x00700000
+#define NUM_GPUS_SHIFT 20
+#define MULTI_GPU_TILE_SIZE(x) ((x) << 24)
+#define MULTI_GPU_TILE_SIZE_MASK 0x03000000
+#define MULTI_GPU_TILE_SIZE_SHIFT 24
+#define ROW_SIZE(x) ((x) << 28)
+#define ROW_SIZE_MASK 0x30000000
+#define ROW_SIZE_SHIFT 28
+
+#define GB_TILE_MODE0 0x2644
+# define MICRO_TILE_MODE(x) ((x) << 0)
+# define ADDR_SURF_DISPLAY_MICRO_TILING 0
+# define ADDR_SURF_THIN_MICRO_TILING 1
+# define ADDR_SURF_DEPTH_MICRO_TILING 2
+# define ARRAY_MODE(x) ((x) << 2)
+# define ARRAY_LINEAR_GENERAL 0
+# define ARRAY_LINEAR_ALIGNED 1
+# define ARRAY_1D_TILED_THIN1 2
+# define ARRAY_2D_TILED_THIN1 4
+# define PIPE_CONFIG(x) ((x) << 6)
+# define ADDR_SURF_P2 0
+# define ADDR_SURF_P4_8x16 4
+# define ADDR_SURF_P4_16x16 5
+# define ADDR_SURF_P4_16x32 6
+# define ADDR_SURF_P4_32x32 7
+# define ADDR_SURF_P8_16x16_8x16 8
+# define ADDR_SURF_P8_16x32_8x16 9
+# define ADDR_SURF_P8_32x32_8x16 10
+# define ADDR_SURF_P8_16x32_16x16 11
+# define ADDR_SURF_P8_32x32_16x16 12
+# define ADDR_SURF_P8_32x32_16x32 13
+# define ADDR_SURF_P8_32x64_32x32 14
+# define TILE_SPLIT(x) ((x) << 11)
+# define ADDR_SURF_TILE_SPLIT_64B 0
+# define ADDR_SURF_TILE_SPLIT_128B 1
+# define ADDR_SURF_TILE_SPLIT_256B 2
+# define ADDR_SURF_TILE_SPLIT_512B 3
+# define ADDR_SURF_TILE_SPLIT_1KB 4
+# define ADDR_SURF_TILE_SPLIT_2KB 5
+# define ADDR_SURF_TILE_SPLIT_4KB 6
+# define BANK_WIDTH(x) ((x) << 14)
+# define ADDR_SURF_BANK_WIDTH_1 0
+# define ADDR_SURF_BANK_WIDTH_2 1
+# define ADDR_SURF_BANK_WIDTH_4 2
+# define ADDR_SURF_BANK_WIDTH_8 3
+# define BANK_HEIGHT(x) ((x) << 16)
+# define ADDR_SURF_BANK_HEIGHT_1 0
+# define ADDR_SURF_BANK_HEIGHT_2 1
+# define ADDR_SURF_BANK_HEIGHT_4 2
+# define ADDR_SURF_BANK_HEIGHT_8 3
+# define MACRO_TILE_ASPECT(x) ((x) << 18)
+# define ADDR_SURF_MACRO_ASPECT_1 0
+# define ADDR_SURF_MACRO_ASPECT_2 1
+# define ADDR_SURF_MACRO_ASPECT_4 2
+# define ADDR_SURF_MACRO_ASPECT_8 3
+# define NUM_BANKS(x) ((x) << 20)
+# define ADDR_SURF_2_BANK 0
+# define ADDR_SURF_4_BANK 1
+# define ADDR_SURF_8_BANK 2
+# define ADDR_SURF_16_BANK 3
+#define GB_TILE_MODE1 0x2645
+#define GB_TILE_MODE2 0x2646
+#define GB_TILE_MODE3 0x2647
+#define GB_TILE_MODE4 0x2648
+#define GB_TILE_MODE5 0x2649
+#define GB_TILE_MODE6 0x264a
+#define GB_TILE_MODE7 0x264b
+#define GB_TILE_MODE8 0x264c
+#define GB_TILE_MODE9 0x264d
+#define GB_TILE_MODE10 0x264e
+#define GB_TILE_MODE11 0x264f
+#define GB_TILE_MODE12 0x2650
+#define GB_TILE_MODE13 0x2651
+#define GB_TILE_MODE14 0x2652
+#define GB_TILE_MODE15 0x2653
+#define GB_TILE_MODE16 0x2654
+#define GB_TILE_MODE17 0x2655
+#define GB_TILE_MODE18 0x2656
+#define GB_TILE_MODE19 0x2657
+#define GB_TILE_MODE20 0x2658
+#define GB_TILE_MODE21 0x2659
+#define GB_TILE_MODE22 0x265a
+#define GB_TILE_MODE23 0x265b
+#define GB_TILE_MODE24 0x265c
+#define GB_TILE_MODE25 0x265d
+#define GB_TILE_MODE26 0x265e
+#define GB_TILE_MODE27 0x265f
+#define GB_TILE_MODE28 0x2660
+#define GB_TILE_MODE29 0x2661
+#define GB_TILE_MODE30 0x2662
+#define GB_TILE_MODE31 0x2663
+
+#define CB_PERFCOUNTER0_SELECT0 0x2688
+#define CB_PERFCOUNTER0_SELECT1 0x2689
+#define CB_PERFCOUNTER1_SELECT0 0x268A
+#define CB_PERFCOUNTER1_SELECT1 0x268B
+#define CB_PERFCOUNTER2_SELECT0 0x268C
+#define CB_PERFCOUNTER2_SELECT1 0x268D
+#define CB_PERFCOUNTER3_SELECT0 0x268E
+#define CB_PERFCOUNTER3_SELECT1 0x268F
+
+#define CB_CGTT_SCLK_CTRL 0x2698
+
+#define GC_USER_RB_BACKEND_DISABLE 0x26DF
+#define BACKEND_DISABLE_MASK 0x00FF0000
+#define BACKEND_DISABLE_SHIFT 16
+
+#define TCP_CHAN_STEER_LO 0x2B03
+#define TCP_CHAN_STEER_HI 0x2B94
+
+#define CP_RB0_BASE 0x3040
+#define CP_RB0_CNTL 0x3041
+#define RB_BUFSZ(x) ((x) << 0)
+#define RB_BLKSZ(x) ((x) << 8)
+#define BUF_SWAP_32BIT (2 << 16)
+#define RB_NO_UPDATE (1 << 27)
+#define RB_RPTR_WR_ENA (1 << 31)
+
+#define CP_RB0_RPTR_ADDR 0x3043
+#define CP_RB0_RPTR_ADDR_HI 0x3044
+#define CP_RB0_WPTR 0x3045
+
+#define CP_PFP_UCODE_ADDR 0x3054
+#define CP_PFP_UCODE_DATA 0x3055
+#define CP_ME_RAM_RADDR 0x3056
+#define CP_ME_RAM_WADDR 0x3057
+#define CP_ME_RAM_DATA 0x3058
+
+#define CP_CE_UCODE_ADDR 0x305A
+#define CP_CE_UCODE_DATA 0x305B
+
+#define CP_RB1_BASE 0x3060
+#define CP_RB1_CNTL 0x3061
+#define CP_RB1_RPTR_ADDR 0x3062
+#define CP_RB1_RPTR_ADDR_HI 0x3063
+#define CP_RB1_WPTR 0x3064
+#define CP_RB2_BASE 0x3065
+#define CP_RB2_CNTL 0x3066
+#define CP_RB2_RPTR_ADDR 0x3067
+#define CP_RB2_RPTR_ADDR_HI 0x3068
+#define CP_RB2_WPTR 0x3069
+#define CP_INT_CNTL_RING0 0x306A
+#define CP_INT_CNTL_RING1 0x306B
+#define CP_INT_CNTL_RING2 0x306C
+# define CNTX_BUSY_INT_ENABLE (1 << 19)
+# define CNTX_EMPTY_INT_ENABLE (1 << 20)
+# define WAIT_MEM_SEM_INT_ENABLE (1 << 21)
+# define TIME_STAMP_INT_ENABLE (1 << 26)
+# define CP_RINGID2_INT_ENABLE (1 << 29)
+# define CP_RINGID1_INT_ENABLE (1 << 30)
+# define CP_RINGID0_INT_ENABLE (1 << 31)
+#define CP_INT_STATUS_RING0 0x306D
+#define CP_INT_STATUS_RING1 0x306E
+#define CP_INT_STATUS_RING2 0x306F
+# define WAIT_MEM_SEM_INT_STAT (1 << 21)
+# define TIME_STAMP_INT_STAT (1 << 26)
+# define CP_RINGID2_INT_STAT (1 << 29)
+# define CP_RINGID1_INT_STAT (1 << 30)
+# define CP_RINGID0_INT_STAT (1 << 31)
+
+#define CP_MEM_SLP_CNTL 0x3079
+# define CP_MEM_LS_EN (1 << 0)
+
+#define CP_DEBUG 0x307F
+
+#define RLC_CNTL 0x30C0
+# define RLC_ENABLE (1 << 0)
+#define RLC_RL_BASE 0x30C1
+#define RLC_RL_SIZE 0x30C2
+#define RLC_LB_CNTL 0x30C3
+# define LOAD_BALANCE_ENABLE (1 << 0)
+#define RLC_SAVE_AND_RESTORE_BASE 0x30C4
+#define RLC_LB_CNTR_MAX 0x30C5
+#define RLC_LB_CNTR_INIT 0x30C6
+
+#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8
+
+#define RLC_UCODE_ADDR 0x30CB
+#define RLC_UCODE_DATA 0x30CC
+
+#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE
+#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF
+#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0
+#define RLC_MC_CNTL 0x30D1
+#define RLC_UCODE_CNTL 0x30D2
+#define RLC_STAT 0x30D3
+# define RLC_BUSY_STATUS (1 << 0)
+# define GFX_POWER_STATUS (1 << 1)
+# define GFX_CLOCK_STATUS (1 << 2)
+# define GFX_LS_STATUS (1 << 3)
+
+#define RLC_PG_CNTL 0x30D7
+# define GFX_PG_ENABLE (1 << 0)
+# define GFX_PG_SRC (1 << 1)
+
+#define RLC_CGTT_MGCG_OVERRIDE 0x3100
+#define RLC_CGCG_CGLS_CTRL 0x3101
+# define CGCG_EN (1 << 0)
+# define CGLS_EN (1 << 1)
+
+#define RLC_TTOP_D 0x3105
+# define RLC_PUD(x) ((x) << 0)
+# define RLC_PUD_MASK (0xff << 0)
+# define RLC_PDD(x) ((x) << 8)
+# define RLC_PDD_MASK (0xff << 8)
+# define RLC_TTPD(x) ((x) << 16)
+# define RLC_TTPD_MASK (0xff << 16)
+# define RLC_MSD(x) ((x) << 24)
+# define RLC_MSD_MASK (0xff << 24)
+
+#define RLC_LB_INIT_CU_MASK 0x3107
+
+#define RLC_PG_AO_CU_MASK 0x310B
+#define RLC_MAX_PG_CU 0x310C
+# define MAX_PU_CU(x) ((x) << 0)
+# define MAX_PU_CU_MASK (0xff << 0)
+#define RLC_AUTO_PG_CTRL 0x310C
+# define AUTO_PG_EN (1 << 0)
+# define GRBM_REG_SGIT(x) ((x) << 3)
+# define GRBM_REG_SGIT_MASK (0xffff << 3)
+# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19)
+# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19)
+
+#define RLC_SERDES_WR_MASTER_MASK_0 0x3115
+#define RLC_SERDES_WR_MASTER_MASK_1 0x3116
+#define RLC_SERDES_WR_CTRL 0x3117
+
+#define RLC_SERDES_MASTER_BUSY_0 0x3119
+#define RLC_SERDES_MASTER_BUSY_1 0x311A
+
+#define RLC_GCPM_GENERAL_3 0x311E
+
+#define DB_RENDER_CONTROL 0xA000
+
+#define DB_DEPTH_INFO 0xA00F
+
+#define PA_SC_RASTER_CONFIG 0xA0D4
+# define RB_MAP_PKR0(x) ((x) << 0)
+# define RB_MAP_PKR0_MASK (0x3 << 0)
+# define RB_MAP_PKR1(x) ((x) << 2)
+# define RB_MAP_PKR1_MASK (0x3 << 2)
+# define RASTER_CONFIG_RB_MAP_0 0
+# define RASTER_CONFIG_RB_MAP_1 1
+# define RASTER_CONFIG_RB_MAP_2 2
+# define RASTER_CONFIG_RB_MAP_3 3
+# define RB_XSEL2(x) ((x) << 4)
+# define RB_XSEL2_MASK (0x3 << 4)
+# define RB_XSEL (1 << 6)
+# define RB_YSEL (1 << 7)
+# define PKR_MAP(x) ((x) << 8)
+# define PKR_MAP_MASK (0x3 << 8)
+# define RASTER_CONFIG_PKR_MAP_0 0
+# define RASTER_CONFIG_PKR_MAP_1 1
+# define RASTER_CONFIG_PKR_MAP_2 2
+# define RASTER_CONFIG_PKR_MAP_3 3
+# define PKR_XSEL(x) ((x) << 10)
+# define PKR_XSEL_MASK (0x3 << 10)
+# define PKR_YSEL(x) ((x) << 12)
+# define PKR_YSEL_MASK (0x3 << 12)
+# define SC_MAP(x) ((x) << 16)
+# define SC_MAP_MASK (0x3 << 16)
+# define SC_XSEL(x) ((x) << 18)
+# define SC_XSEL_MASK (0x3 << 18)
+# define SC_YSEL(x) ((x) << 20)
+# define SC_YSEL_MASK (0x3 << 20)
+# define SE_MAP(x) ((x) << 24)
+# define SE_MAP_MASK (0x3 << 24)
+# define RASTER_CONFIG_SE_MAP_0 0
+# define RASTER_CONFIG_SE_MAP_1 1
+# define RASTER_CONFIG_SE_MAP_2 2
+# define RASTER_CONFIG_SE_MAP_3 3
+# define SE_XSEL(x) ((x) << 26)
+# define SE_XSEL_MASK (0x3 << 26)
+# define SE_YSEL(x) ((x) << 28)
+# define SE_YSEL_MASK (0x3 << 28)
+
+
+#define VGT_EVENT_INITIATOR 0xA2A4
+# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
+# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
+# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
+# define CACHE_FLUSH_TS (4 << 0)
+# define CACHE_FLUSH (6 << 0)
+# define CS_PARTIAL_FLUSH (7 << 0)
+# define VGT_STREAMOUT_RESET (10 << 0)
+# define END_OF_PIPE_INCR_DE (11 << 0)
+# define END_OF_PIPE_IB_END (12 << 0)
+# define RST_PIX_CNT (13 << 0)
+# define VS_PARTIAL_FLUSH (15 << 0)
+# define PS_PARTIAL_FLUSH (16 << 0)
+# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
+# define ZPASS_DONE (21 << 0)
+# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
+# define PERFCOUNTER_START (23 << 0)
+# define PERFCOUNTER_STOP (24 << 0)
+# define PIPELINESTAT_START (25 << 0)
+# define PIPELINESTAT_STOP (26 << 0)
+# define PERFCOUNTER_SAMPLE (27 << 0)
+# define SAMPLE_PIPELINESTAT (30 << 0)
+# define SAMPLE_STREAMOUTSTATS (32 << 0)
+# define RESET_VTX_CNT (33 << 0)
+# define VGT_FLUSH (36 << 0)
+# define BOTTOM_OF_PIPE_TS (40 << 0)
+# define DB_CACHE_FLUSH_AND_INV (42 << 0)
+# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
+# define FLUSH_AND_INV_DB_META (44 << 0)
+# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
+# define FLUSH_AND_INV_CB_META (46 << 0)
+# define CS_DONE (47 << 0)
+# define PS_DONE (48 << 0)
+# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
+# define THREAD_TRACE_START (51 << 0)
+# define THREAD_TRACE_STOP (52 << 0)
+# define THREAD_TRACE_FLUSH (54 << 0)
+# define THREAD_TRACE_FINISH (55 << 0)
+
+/* PIF PHY0 registers idx/data 0x8/0xc */
+#define PB0_PIF_CNTL 0x10
+# define LS2_EXIT_TIME(x) ((x) << 17)
+# define LS2_EXIT_TIME_MASK (0x7 << 17)
+# define LS2_EXIT_TIME_SHIFT 17
+#define PB0_PIF_PAIRING 0x11
+# define MULTI_PIF (1 << 25)
+#define PB0_PIF_PWRDOWN_0 0x12
+# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7)
+# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7)
+# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7
+# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10)
+# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10)
+# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10
+# define PLL_RAMP_UP_TIME_0(x) ((x) << 24)
+# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24)
+# define PLL_RAMP_UP_TIME_0_SHIFT 24
+#define PB0_PIF_PWRDOWN_1 0x13
+# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7)
+# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7)
+# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7
+# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10)
+# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10)
+# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10
+# define PLL_RAMP_UP_TIME_1(x) ((x) << 24)
+# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24)
+# define PLL_RAMP_UP_TIME_1_SHIFT 24
+
+#define PB0_PIF_PWRDOWN_2 0x17
+# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7)
+# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7)
+# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7
+# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10)
+# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10)
+# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10
+# define PLL_RAMP_UP_TIME_2(x) ((x) << 24)
+# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24)
+# define PLL_RAMP_UP_TIME_2_SHIFT 24
+#define PB0_PIF_PWRDOWN_3 0x18
+# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7)
+# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7)
+# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7
+# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10)
+# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10)
+# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10
+# define PLL_RAMP_UP_TIME_3(x) ((x) << 24)
+# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24)
+# define PLL_RAMP_UP_TIME_3_SHIFT 24
+/* PIF PHY1 registers idx/data 0x10/0x14 */
+#define PB1_PIF_CNTL 0x10
+#define PB1_PIF_PAIRING 0x11
+#define PB1_PIF_PWRDOWN_0 0x12
+#define PB1_PIF_PWRDOWN_1 0x13
+
+#define PB1_PIF_PWRDOWN_2 0x17
+#define PB1_PIF_PWRDOWN_3 0x18
+/* PCIE registers idx/data 0x30/0x34 */
+#define PCIE_CNTL2 0x1c /* PCIE */
+# define SLV_MEM_LS_EN (1 << 16)
+# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17)
+# define MST_MEM_LS_EN (1 << 18)
+# define REPLAY_MEM_LS_EN (1 << 19)
+#define PCIE_LC_STATUS1 0x28 /* PCIE */
+# define LC_REVERSE_RCVR (1 << 0)
+# define LC_REVERSE_XMIT (1 << 1)
+# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2)
+# define LC_OPERATING_LINK_WIDTH_SHIFT 2
+# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5)
+# define LC_DETECTED_LINK_WIDTH_SHIFT 5
+
+#define PCIE_P_CNTL 0x40 /* PCIE */
+# define P_IGNORE_EDB_ERR (1 << 6)
+
+/* PCIE PORT registers idx/data 0x38/0x3c */
+#define PCIE_LC_CNTL 0xa0
+# define LC_L0S_INACTIVITY(x) ((x) << 8)
+# define LC_L0S_INACTIVITY_MASK (0xf << 8)
+# define LC_L0S_INACTIVITY_SHIFT 8
+# define LC_L1_INACTIVITY(x) ((x) << 12)
+# define LC_L1_INACTIVITY_MASK (0xf << 12)
+# define LC_L1_INACTIVITY_SHIFT 12
+# define LC_PMI_TO_L1_DIS (1 << 16)
+# define LC_ASPM_TO_L1_DIS (1 << 24)
+#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
+# define LC_LINK_WIDTH_SHIFT 0
+# define LC_LINK_WIDTH_MASK 0x7
+# define LC_LINK_WIDTH_X0 0
+# define LC_LINK_WIDTH_X1 1
+# define LC_LINK_WIDTH_X2 2
+# define LC_LINK_WIDTH_X4 3
+# define LC_LINK_WIDTH_X8 4
+# define LC_LINK_WIDTH_X16 6
+# define LC_LINK_WIDTH_RD_SHIFT 4
+# define LC_LINK_WIDTH_RD_MASK 0x70
+# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7)
+# define LC_RECONFIG_NOW (1 << 8)
+# define LC_RENEGOTIATION_SUPPORT (1 << 9)
+# define LC_RENEGOTIATE_EN (1 << 10)
+# define LC_SHORT_RECONFIG_EN (1 << 11)
+# define LC_UPCONFIGURE_SUPPORT (1 << 12)
+# define LC_UPCONFIGURE_DIS (1 << 13)
+# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21)
+# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21)
+# define LC_DYN_LANES_PWR_STATE_SHIFT 21
+#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */
+# define LC_XMIT_N_FTS(x) ((x) << 0)
+# define LC_XMIT_N_FTS_MASK (0xff << 0)
+# define LC_XMIT_N_FTS_SHIFT 0
+# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8)
+# define LC_N_FTS_MASK (0xff << 24)
+#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */
+# define LC_GEN2_EN_STRAP (1 << 0)
+# define LC_GEN3_EN_STRAP (1 << 1)
+# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2)
+# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3)
+# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3
+# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5)
+# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6)
+# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7)
+# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8)
+# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9)
+# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10)
+# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10
+# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */
+# define LC_CURRENT_DATA_RATE_SHIFT 13
+# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16)
+# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18)
+# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19)
+# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20)
+# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21)
+
+#define PCIE_LC_CNTL2 0xb1
+# define LC_ALLOW_PDWN_IN_L1 (1 << 17)
+# define LC_ALLOW_PDWN_IN_L23 (1 << 18)
+
+#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */
+# define LC_GO_TO_RECOVERY (1 << 30)
+#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */
+# define LC_REDO_EQ (1 << 5)
+# define LC_SET_QUIESCE (1 << 13)
+
+/*
+ * UVD
+ */
+#define UVD_UDEC_ADDR_CONFIG 0x3bd3
+#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4
+#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5
+#define UVD_RBC_RB_RPTR 0x3da4
+#define UVD_RBC_RB_WPTR 0x3da5
+#define UVD_STATUS 0x3daf
+
+#define UVD_CGC_CTRL 0x3dc2
+# define DCM (1 << 0)
+# define CG_DT(x) ((x) << 2)
+# define CG_DT_MASK (0xf << 2)
+# define CLK_OD(x) ((x) << 6)
+# define CLK_OD_MASK (0x1f << 6)
+
+ /* UVD CTX indirect */
+#define UVD_CGC_MEM_CTRL 0xC0
+#define UVD_CGC_CTRL2 0xC1
+# define DYN_OR_EN (1 << 0)
+# define DYN_RR_EN (1 << 1)
+# define G_DIV_ID(x) ((x) << 2)
+# define G_DIV_ID_MASK (0x7 << 2)
+
+/*
+ * PM4
+ */
+#define PACKET_TYPE0 0
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define RADEON_PACKET_TYPE3 3
+#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define GDS_PARTITION_BASE 2
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_ALLOC_GDS 0x1B
+#define PACKET3_WRITE_GDS_RAM 0x1C
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_DRAW_INDEX_IMMD 0x2E
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_CONST 0x31
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - tc/l2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+#define PACKET3_MPEG_INDEX 0x3A
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_MEM_WRITE 0x3D
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ * SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR
+ * 1 - GDS
+ */
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ * 2 - DATA
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
+# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+# define PACKET3_DEST_BASE_0_ENA (1 << 0)
+# define PACKET3_DEST_BASE_1_ENA (1 << 1)
+# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
+# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
+# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
+# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
+# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
+# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
+# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
+# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
+# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
+# define PACKET3_DEST_BASE_2_ENA (1 << 19)
+# define PACKET3_DEST_BASE_3_ENA (1 << 21)
+# define PACKET3_TCL1_ACTION_ENA (1 << 22)
+# define PACKET3_TC_ACTION_ENA (1 << 23)
+# define PACKET3_CB_ACTION_ENA (1 << 25)
+# define PACKET3_DB_ACTION_ENA (1 << 26)
+# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - EOP events
+ * 6 - EOS events
+ * 7 - CACHE_FLUSH, CACHE_FLUSH_AND_INV_EVENT
+ */
+#define INV_L2 (1 << 20)
+ /* INV TC L2 cache when EVENT_INDEX = 7 */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit counter value
+ */
+#define INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_ONE_REG_WRITE 0x57
+#define PACKET3_LOAD_CONFIG_REG 0x5F
+#define PACKET3_LOAD_CONTEXT_REG 0x60
+#define PACKET3_LOAD_SH_REG 0x61
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x000a400
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_RESOURCE_INDIRECT 0x74
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_ME_WRITE 0x7A
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_CE_WRITE 0x7F
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER 0x87
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SET_CE_DE_COUNTERS 0x89
+#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
+#define PACKET3_SWITCH_BUFFER 0x8B
+
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET 0x200 /* not a register */
+
+#define DMA_RB_CNTL 0x3400
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0x3401
+#define DMA_RB_RPTR 0x3402
+#define DMA_RB_WPTR 0x3403
+
+#define DMA_RB_RPTR_ADDR_HI 0x3407
+#define DMA_RB_RPTR_ADDR_LO 0x3408
+
+#define DMA_IB_CNTL 0x3409
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+# define CMD_VMID_FORCE (1 << 31)
+#define DMA_IB_RPTR 0x340a
+#define DMA_CNTL 0x340b
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0x340d
+# define DMA_IDLE (1 << 0)
+#define DMA_TILING_CONFIG 0x342e
+
+#define DMA_POWER_CNTL 0x342f
+# define MEM_POWER_OVERRIDE (1 << 8)
+#define DMA_CLK_CTRL 0x3430
+
+#define DMA_PG 0x3435
+# define PG_CNTL_ENABLE (1 << 0)
+#define DMA_PGFSM_CONFIG 0x3436
+#define DMA_PGFSM_WRITE 0x3437
+
+#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((b) & 0x1) << 26) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
+ (((vmid) & 0xF) << 20) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
+ (1 << 26) | \
+ (1 << 21) | \
+ (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_POLL_REG_MEM 0xe
+#define DMA_PACKET_NOP 0xf
+
+#define VCE_STATUS 0x20004
+#define VCE_VCPU_CNTL 0x20014
+#define VCE_CLK_EN (1 << 0)
+#define VCE_VCPU_CACHE_OFFSET0 0x20024
+#define VCE_VCPU_CACHE_SIZE0 0x20028
+#define VCE_VCPU_CACHE_OFFSET1 0x2002c
+#define VCE_VCPU_CACHE_SIZE1 0x20030
+#define VCE_VCPU_CACHE_OFFSET2 0x20034
+#define VCE_VCPU_CACHE_SIZE2 0x20038
+#define VCE_SOFT_RESET 0x20120
+#define VCE_ECPU_SOFT_RESET (1 << 0)
+#define VCE_FME_SOFT_RESET (1 << 2)
+#define VCE_RB_BASE_LO2 0x2016c
+#define VCE_RB_BASE_HI2 0x20170
+#define VCE_RB_SIZE2 0x20174
+#define VCE_RB_RPTR2 0x20178
+#define VCE_RB_WPTR2 0x2017c
+#define VCE_RB_BASE_LO 0x20180
+#define VCE_RB_BASE_HI 0x20184
+#define VCE_RB_SIZE 0x20188
+#define VCE_RB_RPTR 0x2018c
+#define VCE_RB_WPTR 0x20190
+#define VCE_CLOCK_GATING_A 0x202f8
+#define VCE_CLOCK_GATING_B 0x202fc
+#define VCE_UENC_CLOCK_GATING 0x205bc
+#define VCE_UENC_REG_CLOCK_GATING 0x205c0
+#define VCE_FW_REG_STATUS 0x20e10
+# define VCE_FW_REG_STATUS_BUSY (1 << 0)
+# define VCE_FW_REG_STATUS_PASS (1 << 3)
+# define VCE_FW_REG_STATUS_DONE (1 << 11)
+#define VCE_LMI_FW_START_KEYSEL 0x20e18
+#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20
+#define VCE_LMI_CTRL2 0x20e74
+#define VCE_LMI_CTRL 0x20e98
+#define VCE_LMI_VM_CTRL 0x20ea0
+#define VCE_LMI_SWAP_CNTL 0x20eb4
+#define VCE_LMI_SWAP_CNTL1 0x20eb8
+#define VCE_LMI_CACHE_CTRL 0x20ef4
+
+#define VCE_CMD_NO_OP 0x00000000
+#define VCE_CMD_END 0x00000001
+#define VCE_CMD_IB 0x00000002
+#define VCE_CMD_FENCE 0x00000003
+#define VCE_CMD_TRAP 0x00000004
+#define VCE_CMD_IB_AUTO 0x00000005
+#define VCE_CMD_SEMAPHORE 0x00000006
+
+
+//#dce stupp
+/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
+#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4
+#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4
+#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4
+#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4
+#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4
+#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4
+
+#define CURSOR_WIDTH 64
+#define CURSOR_HEIGHT 64
+#define AMDGPU_MM_INDEX 0x0000
+#define AMDGPU_MM_DATA 0x0001
+
+#define VERDE_NUM_CRTC 6
+#define BLACKOUT_MODE_MASK 0x00000007
+#define VGA_RENDER_CONTROL 0xC0
+#define R_000300_VGA_RENDER_CONTROL 0xC0
+#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
+#define EVERGREEN_CRTC_STATUS 0x1BA3
+#define EVERGREEN_CRTC_V_BLANK (1 << 0)
+#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
+/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
+#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
+#define EVERGREEN_CRTC_CONTROL 0x1b9c
+#define EVERGREEN_CRTC_MASTER_EN (1 << 0)
+#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
+#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
+#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
+#define EVERGREEN_CRTC_V_BLANK (1 << 0)
+#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
+#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
+#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
+#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
+#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
+#define EVERGREEN_GRPH_UPDATE 0x1a11
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
+#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
+
+#define EVERGREEN_DATA_FORMAT 0x1ac0
+# define EVERGREEN_INTERLEAVE_EN (1 << 0)
+
+#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000
+#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc
+
+#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20)
+#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20)
+#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20)
+#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20)
+
+#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45
+#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845
+
+#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847
+#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47
+
+#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8
+#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8
+#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8
+#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8
+#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8
+#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8
+
+#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4
+#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4
+#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4
+#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4
+#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4
+#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4
+
+#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000
+#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000
+#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000
+#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000
+#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000
+#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000
+
+#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1
+#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100
+
+#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1
+
+#define R600_D1GRPH_SWAP_CONTROL 0x1843
+#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0)
+#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0)
+#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0)
+#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
+
+#define AVIVO_D1VGA_CONTROL 0x00cc
+# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0)
+# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8)
+# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
+# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
+# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
+# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24)
+#define AVIVO_D2VGA_CONTROL 0x00ce
+
+#define R600_BUS_CNTL 0x1508
+# define R600_BIOS_ROM_DIS (1 << 1)
+
+#define R600_ROM_CNTL 0x580
+# define R600_SCK_OVERWRITE (1 << 1)
+# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
+# define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28)
+
+#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1
+
+#define FMT_BIT_DEPTH_CONTROL 0x1bf2
+#define FMT_TRUNCATE_EN (1 << 0)
+#define FMT_TRUNCATE_DEPTH (1 << 4)
+#define FMT_SPATIAL_DITHER_EN (1 << 8)
+#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9)
+#define FMT_SPATIAL_DITHER_DEPTH (1 << 12)
+#define FMT_FRAME_RANDOM_ENABLE (1 << 13)
+#define FMT_RGB_RANDOM_ENABLE (1 << 14)
+#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15)
+#define FMT_TEMPORAL_DITHER_EN (1 << 16)
+#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20)
+#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
+#define FMT_TEMPORAL_LEVEL (1 << 24)
+#define FMT_TEMPORAL_DITHER_RESET (1 << 25)
+#define FMT_25FRC_SEL(x) ((x) << 26)
+#define FMT_50FRC_SEL(x) ((x) << 28)
+#define FMT_75FRC_SEL(x) ((x) << 30)
+
+#define EVERGREEN_DC_LUT_CONTROL 0x1a80
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86
+#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c
+#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79
+#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e
+#define EVERGREEN_DC_LUT_RW_MODE 0x1a78
+
+#define EVERGREEN_GRPH_ENABLE 0x1a00
+#define EVERGREEN_GRPH_CONTROL 0x1a01
+#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0)
+#define EVERGREEN_GRPH_DEPTH_8BPP 0
+#define EVERGREEN_GRPH_DEPTH_16BPP 1
+#define EVERGREEN_GRPH_DEPTH_32BPP 2
+#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
+#define EVERGREEN_ADDR_SURF_2_BANK 0
+#define EVERGREEN_ADDR_SURF_4_BANK 1
+#define EVERGREEN_ADDR_SURF_8_BANK 2
+#define EVERGREEN_ADDR_SURF_16_BANK 3
+#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4)
+#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
+#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0
+#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1
+#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2
+#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3
+#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8)
+
+#define EVERGREEN_GRPH_FORMAT_INDEXED 0
+#define EVERGREEN_GRPH_FORMAT_ARGB1555 0
+#define EVERGREEN_GRPH_FORMAT_ARGB565 1
+#define EVERGREEN_GRPH_FORMAT_ARGB4444 2
+#define EVERGREEN_GRPH_FORMAT_AI88 3
+#define EVERGREEN_GRPH_FORMAT_MONO16 4
+#define EVERGREEN_GRPH_FORMAT_BGRA5551 5
+
+/* 32 BPP */
+#define EVERGREEN_GRPH_FORMAT_ARGB8888 0
+#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1
+#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2
+#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3
+#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4
+#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5
+#define EVERGREEN_GRPH_FORMAT_RGB111110 6
+#define EVERGREEN_GRPH_FORMAT_BGR101111 7
+#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
+#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0
+#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1
+#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2
+#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3
+#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5
+#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6
+#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
+#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
+#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0
+#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1
+#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2
+#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
+#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
+
+#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03
+#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
+# define EVERGREEN_GRPH_ENDIAN_NONE 0
+# define EVERGREEN_GRPH_ENDIAN_8IN16 1
+# define EVERGREEN_GRPH_ENDIAN_8IN32 2
+# define EVERGREEN_GRPH_ENDIAN_8IN64 3
+#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
+# define EVERGREEN_GRPH_RED_SEL_R 0
+# define EVERGREEN_GRPH_RED_SEL_G 1
+# define EVERGREEN_GRPH_RED_SEL_B 2
+# define EVERGREEN_GRPH_RED_SEL_A 3
+#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
+# define EVERGREEN_GRPH_GREEN_SEL_G 0
+# define EVERGREEN_GRPH_GREEN_SEL_B 1
+# define EVERGREEN_GRPH_GREEN_SEL_A 2
+# define EVERGREEN_GRPH_GREEN_SEL_R 3
+#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
+# define EVERGREEN_GRPH_BLUE_SEL_B 0
+# define EVERGREEN_GRPH_BLUE_SEL_A 1
+# define EVERGREEN_GRPH_BLUE_SEL_R 2
+# define EVERGREEN_GRPH_BLUE_SEL_G 3
+#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
+# define EVERGREEN_GRPH_ALPHA_SEL_A 0
+# define EVERGREEN_GRPH_ALPHA_SEL_R 1
+# define EVERGREEN_GRPH_ALPHA_SEL_G 2
+# define EVERGREEN_GRPH_ALPHA_SEL_B 3
+
+#define EVERGREEN_D3VGA_CONTROL 0xf8
+#define EVERGREEN_D4VGA_CONTROL 0xf9
+#define EVERGREEN_D5VGA_CONTROL 0xfa
+#define EVERGREEN_D6VGA_CONTROL 0xfb
+
+#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00
+
+#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
+#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8)
+
+#define EVERGREEN_GRPH_PITCH 0x1a06
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
+#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09
+#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a
+#define EVERGREEN_GRPH_X_START 0x1a0b
+#define EVERGREEN_GRPH_Y_START 0x1a0c
+#define EVERGREEN_GRPH_X_END 0x1a0d
+#define EVERGREEN_GRPH_Y_END 0x1a0e
+#define EVERGREEN_GRPH_UPDATE 0x1a11
+#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
+#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
+#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12
+#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
+
+#define EVERGREEN_VIEWPORT_START 0x1b5c
+#define EVERGREEN_VIEWPORT_SIZE 0x1b5d
+#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1
+
+/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
+#define EVERGREEN_CUR_CONTROL 0x1a66
+# define EVERGREEN_CURSOR_EN (1 << 0)
+# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8)
+# define EVERGREEN_CURSOR_MONO 0
+# define EVERGREEN_CURSOR_24_1 1
+# define EVERGREEN_CURSOR_24_8_PRE_MULT 2
+# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3
+# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16)
+# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20)
+# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
+# define EVERGREEN_CURSOR_URGENT_ALWAYS 0
+# define EVERGREEN_CURSOR_URGENT_1_8 1
+# define EVERGREEN_CURSOR_URGENT_1_4 2
+# define EVERGREEN_CURSOR_URGENT_3_8 3
+# define EVERGREEN_CURSOR_URGENT_1_2 4
+#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67
+# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000
+#define EVERGREEN_CUR_SIZE 0x1a68
+#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69
+#define EVERGREEN_CUR_POSITION 0x1a6a
+#define EVERGREEN_CUR_HOT_SPOT 0x1a6b
+#define EVERGREEN_CUR_COLOR1 0x1a6c
+#define EVERGREEN_CUR_COLOR2 0x1a6d
+#define EVERGREEN_CUR_UPDATE 0x1a6e
+# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0)
+# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1)
+# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16)
+# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
+
+
+#define NI_INPUT_CSC_CONTROL 0x1a35
+# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0)
+# define NI_INPUT_CSC_BYPASS 0
+# define NI_INPUT_CSC_PROG_COEFF 1
+# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2
+# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4)
+
+#define NI_OUTPUT_CSC_CONTROL 0x1a3c
+# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0)
+# define NI_OUTPUT_CSC_BYPASS 0
+# define NI_OUTPUT_CSC_TV_RGB 1
+# define NI_OUTPUT_CSC_YCBCR_601 2
+# define NI_OUTPUT_CSC_YCBCR_709 3
+# define NI_OUTPUT_CSC_PROG_COEFF 4
+# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4)
+
+#define NI_DEGAMMA_CONTROL 0x1a58
+# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0)
+# define NI_DEGAMMA_BYPASS 0
+# define NI_DEGAMMA_SRGB_24 1
+# define NI_DEGAMMA_XVYCC_222 2
+# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4)
+# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
+# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12)
+
+#define NI_GAMUT_REMAP_CONTROL 0x1a59
+# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0)
+# define NI_GAMUT_REMAP_BYPASS 0
+# define NI_GAMUT_REMAP_PROG_COEFF 1
+# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4)
+
+#define NI_REGAMMA_CONTROL 0x1aa0
+# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0)
+# define NI_REGAMMA_BYPASS 0
+# define NI_REGAMMA_SRGB_24 1
+# define NI_REGAMMA_XVYCC_222 2
+# define NI_REGAMMA_PROG_A 3
+# define NI_REGAMMA_PROG_B 4
+# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4)
+
+
+#define NI_PRESCALE_GRPH_CONTROL 0x1a2d
+# define NI_GRPH_PRESCALE_BYPASS (1 << 4)
+
+#define NI_PRESCALE_OVL_CONTROL 0x1a31
+# define NI_OVL_PRESCALE_BYPASS (1 << 4)
+
+#define NI_INPUT_GAMMA_CONTROL 0x1a10
+# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0)
+# define NI_INPUT_GAMMA_USE_LUT 0
+# define NI_INPUT_GAMMA_BYPASS 1
+# define NI_INPUT_GAMMA_SRGB_24 2
+# define NI_INPUT_GAMMA_XVYCC_222 3
+# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
+
+#define BLACKOUT_MODE_MASK 0x00000007
+#define VGA_RENDER_CONTROL 0xC0
+#define R_000300_VGA_RENDER_CONTROL 0xC0
+#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
+#define EVERGREEN_CRTC_STATUS 0x1BA3
+#define EVERGREEN_CRTC_V_BLANK (1 << 0)
+#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
+/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
+#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
+#define EVERGREEN_CRTC_CONTROL 0x1b9c
+# define EVERGREEN_CRTC_MASTER_EN (1 << 0)
+# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
+#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
+# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
+# define EVERGREEN_CRTC_V_BLANK (1 << 0)
+#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
+#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
+#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
+#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
+#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
+#define EVERGREEN_GRPH_UPDATE 0x1a11
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
+#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
+
+#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
+#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
+#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
+#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
+#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
+#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
+#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
+#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
+#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
+#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
+#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
+#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
+
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
+#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
+
+#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
+#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
+
+#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
+#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
+#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
+#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
+
+#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
+#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
+#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
+#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
+
+#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8
+#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3
+#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40
+#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6
+#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200
+#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9
+#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000
+#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc
+#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000
+#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf
+#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000
+#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12
+
+#define MC_SEQ_MISC0__MT__MASK 0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
+#define MC_SEQ_MISC0__MT__DDR2 0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3 0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4 0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5 0x50000000
+#define MC_SEQ_MISC0__MT__HBM 0x60000000
+#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
+
+#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
+#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
+#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
+#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
+#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
+#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+
+#define CONFIG_CNTL 0x1509
+#define CC_DRM_ID_STRAPS 0X1559
+#define AMDGPU_PCIE_INDEX 0xc
+#define AMDGPU_PCIE_DATA 0xd
+
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412
+#define DMA_MODE 0x342f
+#define DMA_RB_RPTR_ADDR_HI 0x3407
+#define DMA_RB_RPTR_ADDR_LO 0x3408
+#define DMA_BUSY_MASK 0x20
+#define DMA1_BUSY_MASK 0X40
+#define SDMA_MAX_INSTANCE 2
+
+#define PCIE_BUS_CLK 10000
+#define TCLK (PCIE_BUS_CLK / 10)
+#define PCIE_PORT_INDEX 0xe
+#define PCIE_PORT_DATA 0xf
+#define EVERGREEN_PIF_PHY0_INDEX 0x8
+#define EVERGREEN_PIF_PHY0_DATA 0xc
+#define EVERGREEN_PIF_PHY1_INDEX 0x10
+#define EVERGREEN_PIF_PHY1_DATA 0x14
+
+#define MC_VM_FB_OFFSET 0x81a
+
+/* Discrete VCE clocks */
+#define CG_VCEPLL_FUNC_CNTL 0xc0030600
+#define VCEPLL_RESET_MASK 0x00000001
+#define VCEPLL_SLEEP_MASK 0x00000002
+#define VCEPLL_BYPASS_EN_MASK 0x00000004
+#define VCEPLL_CTLREQ_MASK 0x00000008
+#define VCEPLL_VCO_MODE_MASK 0x00000600
+#define VCEPLL_REF_DIV_MASK 0x003F0000
+#define VCEPLL_CTLACK_MASK 0x40000000
+#define VCEPLL_CTLACK2_MASK 0x80000000
+
+#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601
+#define VCEPLL_PDIV_A(x) ((x) << 0)
+#define VCEPLL_PDIV_A_MASK 0x0000007F
+#define VCEPLL_PDIV_B(x) ((x) << 8)
+#define VCEPLL_PDIV_B_MASK 0x00007F00
+#define EVCLK_SRC_SEL(x) ((x) << 20)
+#define EVCLK_SRC_SEL_MASK 0x01F00000
+#define ECCLK_SRC_SEL(x) ((x) << 25)
+#define ECCLK_SRC_SEL_MASK 0x3E000000
+
+#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602
+#define VCEPLL_FB_DIV(x) ((x) << 0)
+#define VCEPLL_FB_DIV_MASK 0x01FFFFFF
+
+#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603
+
+#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604
+#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606
+#define VCEPLL_SSEN_MASK 0x00000001
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
new file mode 100644
index 000000000..8b8086d5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "sienna_cichlid.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+#if 0
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) &&
+ adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
+ return true;
+#endif
+ return amdgpu_reset_method == AMD_RESET_METHOD_MODE2;
+}
+
+static struct amdgpu_reset_handler *
+sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (sienna_cichlid_is_mode2_default(reset_ctl)) {
+ list_for_each_entry (handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+
+ if (r) {
+ dev_err(adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ return r;
+}
+
+static int
+sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->gfxhub.funcs->mode2_save_regs)
+ adev->gfxhub.funcs->mode2_save_regs(adev);
+ if (adev->gfxhub.funcs->halt)
+ adev->gfxhub.funcs->halt(adev);
+ r = sienna_cichlid_mode2_suspend_ip(adev);
+ }
+
+ return r;
+}
+
+static void sienna_cichlid_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+
+static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
+{
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static int
+sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = sienna_cichlid_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
+ return r;
+ }
+
+ /* Reinit GFXHUB */
+ if (adev->gfxhub.funcs->mode2_restore_regs)
+ adev->gfxhub.funcs->mode2_restore_regs(adev);
+ adev->gfxhub.funcs->init(adev);
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r) {
+ dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ (void *)adev);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = sienna_cichlid_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext,
+ .perform_reset = sienna_cichlid_mode2_perform_reset,
+ .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = sienna_cichlid_mode2_reset,
+};
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = sienna_cichlid_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
+
+ INIT_LIST_HEAD(&reset_ctl->reset_handlers);
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
new file mode 100644
index 000000000..5213b162d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SIENNA_CICHLID_H__
+#define __SIENNA_CICHLID_H__
+
+#include "amdgpu.h"
+
+int sienna_cichlid_reset_init(struct amdgpu_device *adev);
+int sienna_cichlid_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
new file mode 100644
index 000000000..dd2d66090
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -0,0 +1,797 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
+
+#include "smu_v11_0_i2c.h"
+#include "amdgpu.h"
+#include "amdgpu_dpm.h"
+#include "soc15_common.h"
+#include <drm/drm_fixed.h>
+#include <drm/drm_drv.h>
+#include "amdgpu_amdkfd.h"
+#include <linux/i2c.h>
+#include <linux/pci.h>
+
+/* error codes */
+#define I2C_OK 0
+#define I2C_NAK_7B_ADDR_NOACK 1
+#define I2C_NAK_TXDATA_NOACK 2
+#define I2C_TIMEOUT 4
+#define I2C_SW_TIMEOUT 8
+#define I2C_ABORT 0x10
+
+#define I2C_X_RESTART BIT(31)
+
+static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT);
+
+ reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0);
+ WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg);
+}
+
+/* The T_I2C_POLL_US is defined as follows:
+ *
+ * "Define a timer interval (t_i2c_poll) equal to 10 times the
+ * signalling period for the highest I2C transfer speed used in the
+ * system and supported by DW_apb_i2c. For instance, if the highest
+ * I2C data transfer mode is 400 kb/s, then t_i2c_poll is 25 us." --
+ * DesignWare DW_apb_i2c Databook, Version 1.21a, section 3.8.3.1,
+ * page 56, with grammar and syntax corrections.
+ *
+ * Vcc for our device is at 1.8V which puts it at 400 kHz,
+ * see Atmel AT24CM02 datasheet, section 8.3 DC Characteristics table, page 14.
+ *
+ * The procedure to disable the IP block is described in section
+ * 3.8.3 Disabling DW_apb_i2c on page 56.
+ */
+#define I2C_SPEED_MODE_FAST 2
+#define T_I2C_POLL_US 25
+#define I2C_MAX_T_POLL_COUNT 1000
+
+static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0);
+
+ if (!enable) {
+ int ii;
+
+ for (ii = I2C_MAX_T_POLL_COUNT; ii > 0; ii--) {
+ u32 en_stat = RREG32_SOC15(SMUIO,
+ 0,
+ mmCKSVII2C_IC_ENABLE_STATUS);
+ if (REG_GET_FIELD(en_stat, CKSVII2C_IC_ENABLE_STATUS, IC_EN))
+ udelay(T_I2C_POLL_US);
+ else
+ return I2C_OK;
+ }
+
+ return I2C_ABORT;
+ }
+
+ return I2C_OK;
+}
+
+static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ /* do */
+ {
+ RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR);
+
+ } /* while (reg_CKSVII2C_ic_clr_intr == 0) */
+}
+
+static void smu_v11_0_i2c_configure(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t reg = 0;
+
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0);
+ /* The values of IC_MAX_SPEED_MODE are,
+ * 1: standard mode, 0 - 100 Kb/s,
+ * 2: fast mode, <= 400 Kb/s, or fast mode plus, <= 1000 Kb/s,
+ * 3: high speed mode, <= 3.4 Mb/s.
+ */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE,
+ I2C_SPEED_MODE_FAST);
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1);
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg);
+}
+
+static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ /*
+ * Standard mode speed, These values are taken from SMUIO MAS,
+ * but are different from what is given is
+ * Synopsys spec. The values here are based on assumption
+ * that refclock is 100MHz
+ *
+ * Configuration for standard mode; Speed = 100kbps
+ * Scale linearly, for now only support standard speed clock
+ * This will work only with 100M ref clock
+ *
+ * TBD:Change the calculation to take into account ref clock values also.
+ */
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20);
+}
+
+static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ /* The IC_TAR::IC_TAR field is 10-bits wide.
+ * It takes a 7-bit or 10-bit addresses as an address,
+ * i.e. no read/write bit--no wire format, just the address.
+ */
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, address & 0x3FF);
+}
+
+static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t ret = I2C_OK;
+ uint32_t reg, reg_c_tx_abrt_source;
+
+ /*Check if transmission is completed */
+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ do {
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ break;
+ }
+
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+
+ } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0);
+
+ if (ret != I2C_OK)
+ return ret;
+
+ /* This only checks if NAK is received and transaction got aborted */
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT);
+
+ if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) {
+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE);
+ DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source);
+
+ /* Check for stop due to NACK */
+ if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_TXDATA_NOACK) == 1) {
+
+ ret |= I2C_NAK_TXDATA_NOACK;
+
+ } else if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_7B_ADDR_NOACK) == 1) {
+
+ ret |= I2C_NAK_7B_ADDR_NOACK;
+ } else {
+ ret |= I2C_ABORT;
+ }
+
+ smu_v11_0_i2c_clear_status(control);
+ }
+
+ return ret;
+}
+
+static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t ret = I2C_OK;
+ uint32_t reg_ic_status, reg_c_tx_abrt_source;
+
+ reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE);
+
+ /* If slave is not present */
+ if (REG_GET_FIELD(reg_c_tx_abrt_source,
+ CKSVII2C_IC_TX_ABRT_SOURCE,
+ ABRT_7B_ADDR_NOACK) == 1) {
+ ret |= I2C_NAK_7B_ADDR_NOACK;
+
+ smu_v11_0_i2c_clear_status(control);
+ } else { /* wait till some data is there in RXFIFO */
+ /* Poll for some byte in RXFIFO */
+ unsigned long timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ do {
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ break;
+ }
+
+ reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+
+ } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0);
+ }
+
+ return ret;
+}
+
+/**
+ * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device.
+ *
+ * @control: I2C adapter reference
+ * @address: The I2C address of the slave device.
+ * @data: The data to transmit over the bus.
+ * @numbytes: The amount of data to transmit.
+ * @i2c_flag: Flags for transmission
+ *
+ * Returns 0 on success or error.
+ */
+static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
+ u16 address, u8 *data,
+ u32 numbytes, u32 i2c_flag)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ u32 bytes_sent, reg, ret = I2C_OK;
+ unsigned long timeout_counter;
+
+ bytes_sent = 0;
+
+ DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
+ address, numbytes);
+
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
+ 16, 1, data, numbytes, false);
+ }
+
+ /* Set the I2C slave address */
+ smu_v11_0_i2c_set_address(control, address);
+ /* Enable I2C */
+ smu_v11_0_i2c_enable(control, true);
+
+ /* Clear status bits */
+ smu_v11_0_i2c_clear_status(control);
+
+ timeout_counter = jiffies + msecs_to_jiffies(20);
+
+ while (numbytes > 0) {
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+ if (!REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) {
+ /*
+ * We waited for too long for the transmission
+ * FIFO to become not-full. Exit the loop
+ * with error.
+ */
+ if (time_after(jiffies, timeout_counter)) {
+ ret |= I2C_SW_TIMEOUT;
+ goto Err;
+ }
+ } else {
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT,
+ data[bytes_sent]);
+
+ /* Final message, final byte, must generate a
+ * STOP to release the bus, i.e. don't hold
+ * SCL low.
+ */
+ if (numbytes == 1 && i2c_flag & I2C_M_STOP)
+ reg = REG_SET_FIELD(reg,
+ CKSVII2C_IC_DATA_CMD,
+ STOP, 1);
+
+ if (bytes_sent == 0 && i2c_flag & I2C_X_RESTART)
+ reg = REG_SET_FIELD(reg,
+ CKSVII2C_IC_DATA_CMD,
+ RESTART, 1);
+
+ /* Write */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
+
+ /* Record that the bytes were transmitted */
+ bytes_sent++;
+ numbytes--;
+ }
+ }
+
+ ret = smu_v11_0_i2c_poll_tx_status(control);
+Err:
+ /* Any error, no point in proceeding */
+ if (ret != I2C_OK) {
+ if (ret & I2C_SW_TIMEOUT)
+ DRM_ERROR("TIMEOUT ERROR !!!");
+
+ if (ret & I2C_NAK_7B_ADDR_NOACK)
+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!");
+
+
+ if (ret & I2C_NAK_TXDATA_NOACK)
+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!");
+ }
+
+ return ret;
+}
+
+
+/**
+ * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device.
+ *
+ * @control: I2C adapter reference
+ * @address: The I2C address of the slave device.
+ * @data: Placeholder to store received data.
+ * @numbytes: The amount of data to transmit.
+ * @i2c_flag: Flags for transmission
+ *
+ * Returns 0 on success or error.
+ */
+static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
+ u16 address, u8 *data,
+ u32 numbytes, u32 i2c_flag)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t bytes_received, ret = I2C_OK;
+
+ bytes_received = 0;
+
+ /* Set the I2C slave address */
+ smu_v11_0_i2c_set_address(control, address);
+
+ /* Enable I2C */
+ smu_v11_0_i2c_enable(control, true);
+
+ while (numbytes > 0) {
+ uint32_t reg = 0;
+
+ smu_v11_0_i2c_clear_status(control);
+
+ /* Prepare transaction */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0);
+ /* Read */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1);
+
+ /* Final message, final byte, must generate a STOP
+ * to release the bus, i.e. don't hold SCL low.
+ */
+ if (numbytes == 1 && i2c_flag & I2C_M_STOP)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD,
+ STOP, 1);
+
+ if (bytes_received == 0 && i2c_flag & I2C_X_RESTART)
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD,
+ RESTART, 1);
+
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg);
+
+ ret = smu_v11_0_i2c_poll_rx_status(control);
+
+ /* Any error, no point in proceeding */
+ if (ret != I2C_OK) {
+ if (ret & I2C_SW_TIMEOUT)
+ DRM_ERROR("TIMEOUT ERROR !!!");
+
+ if (ret & I2C_NAK_7B_ADDR_NOACK)
+ DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!");
+
+ if (ret & I2C_NAK_TXDATA_NOACK)
+ DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!");
+
+ break;
+ }
+
+ reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD);
+ data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT);
+
+ /* Record that the bytes were received */
+ bytes_received++;
+ numbytes--;
+ }
+
+ DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :",
+ (uint16_t)address, bytes_received);
+
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
+ 16, 1, data, bytes_received, false);
+ }
+
+ return ret;
+}
+
+static void smu_v11_0_i2c_abort(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ uint32_t reg = 0;
+
+ /* Enable I2C engine; */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg);
+
+ /* Abort previous transaction */
+ reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1);
+ WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg);
+
+ DRM_DEBUG_DRIVER("I2C_Abort() Done.");
+}
+
+static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ const uint32_t IDLE_TIMEOUT = 1024;
+ uint32_t timeout_count = 0;
+ uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity;
+
+ reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
+ reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
+
+ if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) &&
+ (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) {
+ /*
+ * Nobody is using I2C engine, but engine remains active because
+ * someone missed to send STOP
+ */
+ smu_v11_0_i2c_abort(control);
+ } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) {
+ /* Nobody is using I2C engine */
+ return true;
+ }
+
+ /* Keep reading activity bit until it's cleared */
+ do {
+ reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY);
+
+ if (REG_GET_FIELD(reg_ic_clr_activity,
+ CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0)
+ return true;
+
+ ++timeout_count;
+
+ } while (timeout_count < IDLE_TIMEOUT);
+
+ return false;
+}
+
+static void smu_v11_0_i2c_init(struct i2c_adapter *control)
+{
+ int res;
+
+ /* Disable clock gating */
+ smu_v11_0_i2c_set_clock_gating(control, false);
+
+ if (!smu_v11_0_i2c_activity_done(control))
+ DRM_WARN("I2C busy !");
+
+ /* Disable I2C */
+ res = smu_v11_0_i2c_enable(control, false);
+ if (res != I2C_OK)
+ smu_v11_0_i2c_abort(control);
+
+ /* Configure I2C to operate as master and in standard mode */
+ smu_v11_0_i2c_configure(control);
+
+ /* Initialize the clock to 50 kHz default */
+ smu_v11_0_i2c_set_clock(control);
+
+}
+
+static void smu_v11_0_i2c_fini(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+ u32 status, enable, en_stat;
+ int res;
+
+ res = smu_v11_0_i2c_enable(control, false);
+ if (res != I2C_OK) {
+ status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS);
+ enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE);
+ en_stat = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS);
+
+ /* Nobody is using the I2C engine, yet it remains
+ * active, possibly because someone missed to send
+ * STOP.
+ */
+ DRM_DEBUG_DRIVER("Aborting from fini: status:0x%08x "
+ "enable:0x%08x enable_stat:0x%08x",
+ status, enable, en_stat);
+ smu_v11_0_i2c_abort(control);
+ }
+
+ /* Restore clock gating */
+
+ /*
+ * TODO Reenabling clock gating seems to break subsequent SMU operation
+ * on the I2C bus. My guess is that SMU doesn't disable clock gating like
+ * we do here before working with the bus. So for now just don't restore
+ * it but later work with SMU to see if they have this issue and can
+ * update their code appropriately
+ */
+ /* smu_v11_0_i2c_set_clock_gating(control, true); */
+
+}
+
+static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ /* Send PPSMC_MSG_RequestI2CBus */
+ if (!amdgpu_dpm_smu_i2c_bus_access(adev, true))
+ return true;
+
+ return false;
+}
+
+static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ /* Send PPSMC_MSG_ReleaseI2CBus */
+ if (!amdgpu_dpm_smu_i2c_bus_access(adev, false))
+ return true;
+
+ return false;
+}
+
+/***************************** I2C GLUE ****************************/
+
+static uint32_t smu_v11_0_i2c_read_data(struct i2c_adapter *control,
+ struct i2c_msg *msg, uint32_t i2c_flag)
+{
+ uint32_t ret;
+
+ ret = smu_v11_0_i2c_receive(control, msg->addr, msg->buf, msg->len, i2c_flag);
+
+ if (ret != I2C_OK)
+ DRM_ERROR("ReadData() - I2C error occurred :%x", ret);
+
+ return ret;
+}
+
+static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control,
+ struct i2c_msg *msg, uint32_t i2c_flag)
+{
+ uint32_t ret;
+
+ ret = smu_v11_0_i2c_transmit(control, msg->addr, msg->buf, msg->len, i2c_flag);
+
+ if (ret != I2C_OK)
+ DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret);
+
+ return ret;
+
+}
+
+static void lock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ mutex_lock(&smu_i2c->mutex);
+ if (!smu_v11_0_i2c_bus_lock(i2c))
+ DRM_ERROR("Failed to lock the bus from SMU");
+ else
+ adev->pm.bus_locked = true;
+}
+
+static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ WARN_ONCE(1, "This operation not supposed to run in atomic context!");
+ return false;
+}
+
+static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c);
+ struct amdgpu_device *adev = smu_i2c->adev;
+
+ if (!smu_v11_0_i2c_bus_unlock(i2c))
+ DRM_ERROR("Failed to unlock the bus from SMU");
+ else
+ adev->pm.bus_locked = false;
+ mutex_unlock(&smu_i2c->mutex);
+}
+
+static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = {
+ .lock_bus = lock_bus,
+ .trylock_bus = trylock_bus,
+ .unlock_bus = unlock_bus,
+};
+
+static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap,
+ struct i2c_msg *msg, int num)
+{
+ int i, ret;
+ u16 addr, dir;
+
+ smu_v11_0_i2c_init(i2c_adap);
+
+ /* From the client's point of view, this sequence of
+ * messages-- the array i2c_msg *msg, is a single transaction
+ * on the bus, starting with START and ending with STOP.
+ *
+ * The client is welcome to send any sequence of messages in
+ * this array, as processing under this function here is
+ * striving to be agnostic.
+ *
+ * Record the first address and direction we see. If either
+ * changes for a subsequent message, generate ReSTART. The
+ * DW_apb_i2c databook, v1.21a, specifies that ReSTART is
+ * generated when the direction changes, with the default IP
+ * block parameter settings, but it doesn't specify if ReSTART
+ * is generated when the address changes (possibly...). We
+ * don't rely on the default IP block parameter settings as
+ * the block is shared and they may change.
+ */
+ if (num > 0) {
+ addr = msg[0].addr;
+ dir = msg[0].flags & I2C_M_RD;
+ }
+
+ for (i = 0; i < num; i++) {
+ u32 i2c_flag = 0;
+
+ if (msg[i].addr != addr || (msg[i].flags ^ dir) & I2C_M_RD) {
+ addr = msg[i].addr;
+ dir = msg[i].flags & I2C_M_RD;
+ i2c_flag |= I2C_X_RESTART;
+ }
+
+ if (i == num - 1) {
+ /* Set the STOP bit on the last message, so
+ * that the IP block generates a STOP after
+ * the last byte of the message.
+ */
+ i2c_flag |= I2C_M_STOP;
+ }
+
+ if (msg[i].flags & I2C_M_RD)
+ ret = smu_v11_0_i2c_read_data(i2c_adap,
+ msg + i,
+ i2c_flag);
+ else
+ ret = smu_v11_0_i2c_write_data(i2c_adap,
+ msg + i,
+ i2c_flag);
+
+ if (ret != I2C_OK) {
+ num = -EIO;
+ break;
+ }
+ }
+
+ smu_v11_0_i2c_fini(i2c_adap);
+ return num;
+}
+
+static u32 smu_v11_0_i2c_func(struct i2c_adapter *adap)
+{
+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm smu_v11_0_i2c_algo = {
+ .master_xfer = smu_v11_0_i2c_xfer,
+ .functionality = smu_v11_0_i2c_func,
+};
+
+static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = {
+ .flags = I2C_AQ_NO_ZERO_LEN,
+};
+
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[0];
+ struct i2c_adapter *control = &smu_i2c->adapter;
+ int res;
+
+ smu_i2c->adev = adev;
+ smu_i2c->port = 0;
+ mutex_init(&smu_i2c->mutex);
+ control->owner = THIS_MODULE;
+ control->class = I2C_CLASS_HWMON;
+ control->dev.parent = &adev->pdev->dev;
+ control->algo = &smu_v11_0_i2c_algo;
+ snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0");
+ control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops;
+ control->quirks = &smu_v11_0_i2c_control_quirks;
+ i2c_set_adapdata(control, smu_i2c);
+
+ adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
+ adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
+
+ res = i2c_add_adapter(control);
+ if (res)
+ DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
+
+ return res;
+}
+
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev)
+{
+ struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus;
+
+ i2c_del_adapter(control);
+ adev->pm.ras_eeprom_i2c_bus = NULL;
+ adev->pm.fru_eeprom_i2c_bus = NULL;
+}
+
+/*
+ * Keep this for future unit test if bugs arise
+ */
+#if 0
+#define I2C_TARGET_ADDR 0xA0
+
+bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control)
+{
+
+ uint32_t ret = I2C_OK;
+ uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef};
+
+
+ DRM_INFO("Begin");
+
+ if (!smu_v11_0_i2c_bus_lock(control)) {
+ DRM_ERROR("Failed to lock the bus!.");
+ return false;
+ }
+
+ smu_v11_0_i2c_init(control);
+
+ /* Write 0xde to address 0x0000 on the EEPROM */
+ ret = smu_v11_0_i2c_write_data(control, I2C_TARGET_ADDR, data, 6);
+
+ ret = smu_v11_0_i2c_read_data(control, I2C_TARGET_ADDR, data, 6);
+
+ smu_v11_0_i2c_fini(control);
+
+ smu_v11_0_i2c_bus_unlock(control);
+
+
+ DRM_INFO("End");
+ return true;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
new file mode 100644
index 000000000..96ad14288
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU_V11_I2C_CONTROL_H
+#define SMU_V11_I2C_CONTROL_H
+
+#include <linux/types.h>
+
+struct amdgpu_device;
+
+int smu_v11_0_i2c_control_init(struct amdgpu_device *adev);
+void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
new file mode 100644
index 000000000..ae29620b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "smu_v13_0_10.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+
+static bool smu_v13_0_10_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ if (adev->pm.fw_version >= 0x00502005 && !amdgpu_sriov_vf(adev))
+ return true;
+
+ return false;
+}
+
+static struct amdgpu_reset_handler *
+smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (reset_context->method != AMD_RESET_METHOD_NONE) {
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_context->method)
+ return handler;
+ }
+ }
+
+ if (smu_v13_0_10_is_mode2_default(reset_ctl) &&
+ amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) {
+ list_for_each_entry (handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == AMD_RESET_METHOD_MODE2)
+ return handler;
+ }
+ }
+
+ return NULL;
+}
+
+static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES))
+ continue;
+
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+
+ if (r) {
+ dev_err(adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ return r;
+}
+
+static int
+smu_v13_0_10_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ r = smu_v13_0_10_mode2_suspend_ip(adev);
+
+ return r;
+}
+
+static int smu_v13_0_10_mode2_reset(struct amdgpu_device *adev)
+{
+ return amdgpu_dpm_mode2_reset(adev);
+}
+
+static void smu_v13_0_10_async_reset(struct work_struct *work)
+{
+ struct amdgpu_reset_handler *handler;
+ struct amdgpu_reset_control *reset_ctl =
+ container_of(work, struct amdgpu_reset_control, reset_work);
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ list_for_each_entry(handler, &reset_ctl->reset_handlers,
+ handler_list) {
+ if (handler->reset_method == reset_ctl->active_reset) {
+ dev_dbg(adev->dev, "Resetting device\n");
+ handler->do_reset(adev);
+ break;
+ }
+ }
+}
+static int
+smu_v13_0_10_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int r;
+
+ r = smu_v13_0_10_mode2_reset(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "ASIC reset failed with error, %d ", r);
+ }
+ return r;
+}
+
+static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
+{
+ int i, r;
+ struct psp_context *psp = &adev->psp;
+ struct amdgpu_firmware_info *ucode;
+ struct amdgpu_firmware_info *ucode_list[2];
+ int ucode_count = 0;
+
+ for (i = 0; i < adev->firmware.max_ucodes; i++) {
+ ucode = &adev->firmware.ucode[i];
+
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_IMU_I:
+ case AMDGPU_UCODE_ID_IMU_D:
+ ucode_list[ucode_count++] = ucode;
+ break;
+ default:
+ break;
+ }
+ }
+
+ r = psp_load_fw_list(psp, ucode_list, ucode_count);
+ if (r) {
+ dev_err(adev->dev, "IMU ucode load failed after mode2 reset\n");
+ return r;
+ }
+
+ r = psp_rlc_autoload_start(psp);
+ if (r) {
+ DRM_ERROR("Failed to start rlc autoload after mode2 reset\n");
+ return r;
+ }
+
+ amdgpu_dpm_enable_gfx_features(adev);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!(adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_GFX ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_MES ||
+ adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
+ continue;
+
+ if (adev->ip_blocks[i].version->funcs->late_init) {
+ r = adev->ip_blocks[i].version->funcs->late_init(
+ (void *)adev);
+ if (r) {
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d after reset\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.late_initialized = true;
+ }
+
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+ return r;
+}
+
+static int
+smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ int r;
+ struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+
+ dev_info(tmp_adev->dev,
+ "GPU reset succeeded, trying to resume\n");
+ r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
+ if (r)
+ goto end;
+
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ /* Resume RAS */
+ amdgpu_ras_resume(tmp_adev);
+
+ amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+ r = amdgpu_ib_ring_tests(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ib ring test failed (%d).\n", r);
+ r = -EAGAIN;
+ goto end;
+ }
+
+end:
+ if (r)
+ return -EAGAIN;
+ else
+ return r;
+}
+
+static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = {
+ .reset_method = AMD_RESET_METHOD_MODE2,
+ .prepare_env = NULL,
+ .prepare_hwcontext = smu_v13_0_10_mode2_prepare_hwcontext,
+ .perform_reset = smu_v13_0_10_mode2_perform_reset,
+ .restore_hwcontext = smu_v13_0_10_mode2_restore_hwcontext,
+ .restore_env = NULL,
+ .do_reset = smu_v13_0_10_mode2_reset,
+};
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_reset_control *reset_ctl;
+
+ reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+ if (!reset_ctl)
+ return -ENOMEM;
+
+ reset_ctl->handle = adev;
+ reset_ctl->async_reset = smu_v13_0_10_async_reset;
+ reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+ reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler;
+
+ INIT_LIST_HEAD(&reset_ctl->reset_handlers);
+ INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+ /* Only mode2 is handled through reset control now */
+ amdgpu_reset_add_handler(reset_ctl, &smu_v13_0_10_mode2_handler);
+
+ adev->reset_cntl = reset_ctl;
+
+ return 0;
+}
+
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->reset_cntl);
+ adev->reset_cntl = NULL;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
new file mode 100644
index 000000000..e0cb72a0e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SMU_V13_0_10_H__
+#define __SMU_V13_0_10_H__
+
+#include "amdgpu.h"
+
+int smu_v13_0_10_reset_init(struct amdgpu_device *adev);
+int smu_v13_0_10_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
new file mode 100644
index 000000000..acdc40f99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v11_0.h"
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
+
+static u32 smuio_v11_0_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX);
+}
+
+static u32 smuio_v11_0_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA);
+}
+
+static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 def, data;
+
+ /* enable/disable ROM CG is not supported on APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+
+ if (enable)
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (def != data)
+ WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
+}
+
+static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not available for APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+const struct amdgpu_smuio_funcs smuio_v11_0_funcs = {
+ .get_rom_index_offset = smuio_v11_0_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v11_0_get_rom_data_offset,
+ .update_rom_clock_gating = smuio_v11_0_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v11_0_get_clock_gating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h
new file mode 100644
index 000000000..43c4262f2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V11_0_H__
+#define __SMUIO_V11_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v11_0_funcs;
+
+#endif /* __SMUIO_V11_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
new file mode 100644
index 000000000..2afeb8b37
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v11_0_6.h"
+#include "smuio/smuio_11_0_6_offset.h"
+#include "smuio/smuio_11_0_6_sh_mask.h"
+
+static u32 smuio_v11_0_6_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX);
+}
+
+static u32 smuio_v11_0_6_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA);
+}
+
+static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 def, data;
+
+ /* enable/disable ROM CG is not supported on APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (def != data)
+ WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
+}
+
+static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not available for APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+const struct amdgpu_smuio_funcs smuio_v11_0_6_funcs = {
+ .get_rom_index_offset = smuio_v11_0_6_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v11_0_6_get_rom_data_offset,
+ .update_rom_clock_gating = smuio_v11_0_6_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v11_0_6_get_clock_gating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.h b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.h
new file mode 100644
index 000000000..3c3f4ab0b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V11_0_6_H__
+#define __SMUIO_V11_0_6_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v11_0_6_funcs;
+
+#endif /* __SMUIO_V11_0_6_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
new file mode 100644
index 000000000..13e905c22
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0.h"
+#include "smuio/smuio_13_0_2_offset.h"
+#include "smuio/smuio_13_0_2_sh_mask.h"
+
+#define SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK 0x00000001L
+
+static u32 smuio_v13_0_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v13_0_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 def, data;
+
+ /* enable/disable ROM CG is not supported on APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ def = data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (def != data)
+ WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data);
+}
+
+static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not available for APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+/**
+ * smuio_v13_0_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_is_host_gpu_xgmi_supported - detect xgmi interface between cpu and gpu/s.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true on success or false otherwise.
+ */
+static bool smuio_v13_0_is_host_gpu_xgmi_supported(struct amdgpu_device *adev)
+{
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+ /* data[4:0]
+ * bit 0 == 0 host-gpu interface is PCIE
+ * bit 0 == 1 host-gpu interface is Alternate Protocal
+ * for AMD, this is XGMI
+ */
+ data &= SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK;
+
+ return data ? true : false;
+}
+
+const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
+ .get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
+ .get_die_id = smuio_v13_0_get_die_id,
+ .get_socket_id = smuio_v13_0_get_socket_id,
+ .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
+ .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h
new file mode 100644
index 000000000..a3bfe3e4f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_H__
+#define __SMUIO_V13_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_funcs;
+
+#endif /* __SMUIO_V13_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
new file mode 100644
index 000000000..4368a5891
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_3.h"
+#include "soc15_common.h"
+#include "smuio/smuio_13_0_3_offset.h"
+#include "smuio/smuio_13_0_3_sh_mask.h"
+
+#define PKG_TYPE_MASK 0x00000003L
+
+/**
+ * smuio_v13_0_3_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v13_0_3_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v13_0_3_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_3_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v13_0_3_get_pkg_type - query package type set by MP1/bootcode
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns package type
+ */
+
+static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+ /* pkg_type[4:0]
+ *
+ * bit 1 == 1 APU form factor
+ *
+ * b0100 - b1111 - Reserved
+ */
+ switch (data & PKG_TYPE_MASK) {
+ case 0x2:
+ pkg_type = AMDGPU_PKG_TYPE_APU;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_UNKNOWN;
+ break;
+ }
+
+ return pkg_type;
+}
+
+
+const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs = {
+ .get_die_id = smuio_v13_0_3_get_die_id,
+ .get_socket_id = smuio_v13_0_3_get_socket_id,
+ .get_pkg_type = smuio_v13_0_3_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
new file mode 100644
index 000000000..795f66c5a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_3_H__
+#define __SMUIO_V13_0_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs;
+
+#endif /* __SMUIO_V13_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
new file mode 100644
index 000000000..de998e328
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v13_0_6.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+
+static u32 smuio_v13_0_6_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v13_0_6_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs = {
+ .get_rom_index_offset = smuio_v13_0_6_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v13_0_6_get_rom_data_offset,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
new file mode 100644
index 000000000..c75621de5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_6.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V13_0_6_H__
+#define __SMUIO_V13_0_6_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v13_0_6_funcs;
+
+#endif /* __SMUIO_V13_0_6_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
new file mode 100644
index 000000000..e4e30b9d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v9_0.h"
+#include "smuio/smuio_9_0_offset.h"
+#include "smuio/smuio_9_0_sh_mask.h"
+
+static u32 smuio_v9_0_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX);
+}
+
+static u32 smuio_v9_0_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA);
+}
+
+static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 def, data;
+
+ /* enable/disable ROM CG is not supported on APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (def != data)
+ WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
+}
+
+static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+const struct amdgpu_smuio_funcs smuio_v9_0_funcs = {
+ .get_rom_index_offset = smuio_v9_0_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v9_0_get_rom_data_offset,
+ .update_rom_clock_gating = smuio_v9_0_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v9_0_get_clock_gating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h
new file mode 100644
index 000000000..fc265ce98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V9_0_H__
+#define __SMUIO_V9_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v9_0_funcs;
+
+#endif /* __SMUIO_V9_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
new file mode 100644
index 000000000..3667f9a54
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -0,0 +1,1475 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "uvd/uvd_7_0_offset.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
+#include "nbio/nbio_7_0_sh_mask.h"
+#include "nbio/nbio_7_0_smn.h"
+#include "mp/mp_9_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "gfx_v9_0.h"
+#include "gmc_v9_0.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v1_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
+#include "hdp_v4_0.h"
+#include "vega10_ih.h"
+#include "vega20_ih.h"
+#include "navi10_ih.h"
+#include "sdma_v4_0.h"
+#include "uvd_v7_0.h"
+#include "vce_v4_0.h"
+#include "vcn_v1_0.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
+#include "smuio_v9_0.h"
+#include "smuio_v11_0.h"
+#include "smuio_v13_0.h"
+#include "amdgpu_vkms.h"
+#include "mxgpu_ai.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+#define mmMP0_MISC_CGTT_CTRL0 0x01b9
+#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
+#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba
+#define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX 0
+
+static const struct amd_ip_funcs soc15_common_ip_funcs;
+
+/* Vega, Raven, Arcturus */
+static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static const struct amdgpu_video_codecs vega_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(vega_video_codecs_encode_array),
+ .codec_array = vega_video_codecs_encode_array,
+};
+
+/* Vega */
+static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+};
+
+static const struct amdgpu_video_codecs vega_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(vega_video_codecs_decode_array),
+ .codec_array = vega_video_codecs_decode_array,
+};
+
+/* Raven */
+static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
+};
+
+static const struct amdgpu_video_codecs rv_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(rv_video_codecs_decode_array),
+ .codec_array = rv_video_codecs_decode_array,
+};
+
+/* Renoir, Arcturus */
+static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
+{
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs rn_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(rn_video_codecs_decode_array),
+ .codec_array = rn_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_3_video_codecs_decode_array),
+ .codec_array = vcn_4_0_3_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->ip_versions[VCE_HWIP][0]) {
+ switch (adev->ip_versions[VCE_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 1, 0):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &vega_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(1, 0, 0):
+ case IP_VERSION(1, 0, 1):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &rv_video_codecs_decode;
+ return 0;
+ case IP_VERSION(2, 5, 0):
+ case IP_VERSION(2, 6, 0):
+ case IP_VERSION(2, 2, 0):
+ if (encode)
+ *codecs = &vega_video_codecs_encode;
+ else
+ *codecs = &rn_video_codecs_decode;
+ return 0;
+ case IP_VERSION(4, 0, 3):
+ if (encode)
+ *codecs = &vcn_4_0_3_video_codecs_encode;
+ else
+ *codecs = &vcn_4_0_3_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
+}
+
+static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX);
+ data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA);
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(address, ((reg) & 0x1ff));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ return r;
+}
+
+static void soc15_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX);
+ data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA);
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(address, ((reg) & 0x1ff));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
+static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg));
+ r = RREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA);
+ spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ return r;
+}
+
+static void soc15_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg));
+ WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+}
+
+static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->se_cac_idx_lock, flags);
+ WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg));
+ r = RREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA);
+ spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags);
+ return r;
+}
+
+static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->se_cac_idx_lock, flags);
+ WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg));
+ WREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags);
+}
+
+static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc15_get_xclk(struct amdgpu_device *adev)
+{
+ u32 reference_clock = adev->clock.spll.reference_freq;
+
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) ||
+ adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) ||
+ adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 6))
+ return 10000;
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) ||
+ adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1))
+ return reference_clock / 4;
+
+ return reference_clock;
+}
+
+
+void soc15_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static bool soc15_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+ { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)},
+};
+
+static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc15_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
+ return adev->gfx.config.gb_addr_config;
+ else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))
+ return adev->gfx.config.db_debug2;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
+ en = &soc15_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc15_get_register_value(adev,
+ soc15_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+
+/**
+ * soc15_program_register_sequence - program an array of registers.
+ *
+ * @adev: amdgpu_device pointer
+ * @regs: pointer to the register array
+ * @array_size: size of the register array
+ *
+ * Programs an array or registers with and and or masks.
+ * This is a helper for setting golden registers.
+ */
+
+void soc15_program_register_sequence(struct amdgpu_device *adev,
+ const struct soc15_reg_golden *regs,
+ const u32 array_size)
+{
+ const struct soc15_reg_golden *entry;
+ u32 tmp, reg;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+
+ if (entry->and_mask == 0xffffffff) {
+ tmp = entry->or_mask;
+ } else {
+ tmp = (entry->hwip == GC_HWIP) ?
+ RREG32_SOC15_IP(GC, reg) : RREG32(reg);
+
+ tmp &= ~(entry->and_mask);
+ tmp |= (entry->or_mask & entry->and_mask);
+ }
+
+ if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
+ WREG32_RLC(reg, tmp);
+ else
+ (entry->hwip == GC_HWIP) ?
+ WREG32_SOC15_IP(GC, reg, tmp) : WREG32(reg, tmp);
+
+ }
+
+}
+
+static int soc15_asic_baco_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ /* avoid NBIF got stuck when do RAS recovery in BACO reset */
+ if (ras && adev->ras_enabled)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+ ret = amdgpu_dpm_baco_reset(adev);
+ if (ret)
+ return ret;
+
+ /* re-enable doorbell interrupt after BACO exit */
+ if (ras && adev->ras_enabled)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+ return 0;
+}
+
+static enum amd_reset_method
+soc15_asic_reset_method(struct amdgpu_device *adev)
+{
+ bool baco_reset = false;
+ bool connected_to_cpu = false;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)
+ connected_to_cpu = true;
+
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
+ amdgpu_reset_method == AMD_RESET_METHOD_PCI) {
+ /* If connected to cpu, driver only support mode2 */
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
+ return amdgpu_reset_method;
+ }
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(10, 0, 0):
+ case IP_VERSION(10, 0, 1):
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ return AMD_RESET_METHOD_MODE2;
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_VEGA20) {
+ if (adev->psp.sos.fw_version >= 0x80067)
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ /*
+ * 1. PMFW version > 0x284300: all cases use baco
+ * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+ */
+ if (ras && adev->ras_enabled &&
+ adev->pm.fw_version <= 0x283400)
+ baco_reset = false;
+ } else {
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ }
+ break;
+ case IP_VERSION(13, 0, 2):
+ /*
+ * 1.connected to cpu: driver issue mode2 reset
+ * 2.discret gpu: driver issue mode1 reset
+ */
+ if (connected_to_cpu)
+ return AMD_RESET_METHOD_MODE2;
+ break;
+ case IP_VERSION(13, 0, 6):
+ /* Use gpu_recovery param to target a reset method.
+ * Enable triggering of GPU reset only if specified
+ * by module parameter.
+ */
+ if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
+ return AMD_RESET_METHOD_MODE2;
+ else if (!(adev->flags & AMD_IS_APU))
+ return AMD_RESET_METHOD_MODE1;
+ else
+ return AMD_RESET_METHOD_MODE2;
+ default:
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+}
+
+static int soc15_asic_reset(struct amdgpu_device *adev)
+{
+ /* original raven doesn't have full asic reset */
+ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
+ return 0;
+
+ switch (soc15_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ return amdgpu_device_pci_reset(adev);
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ return soc15_asic_baco_reset(adev);
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ return amdgpu_dpm_mode2_reset(adev);
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ return amdgpu_device_mode1_reset(adev);
+ }
+}
+
+static bool soc15_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(9, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ if (adev->asic_type == CHIP_VEGA20) {
+ if (adev->psp.sos.fw_version >= 0x80067)
+ return amdgpu_dpm_is_baco_supported(adev);
+ return false;
+ } else {
+ return amdgpu_dpm_is_baco_supported(adev);
+ }
+ break;
+ default:
+ return false;
+ }
+}
+
+/*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
+ u32 cntl_reg, u32 status_reg)
+{
+ return 0;
+}*/
+
+static int soc15_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /*int r;
+
+ r = soc15_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+ if (r)
+ return r;
+
+ r = soc15_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+ */
+ return 0;
+}
+
+static int soc15_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+
+ return 0;
+}
+
+static void soc15_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version vega10_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc15_common_ip_funcs,
+};
+
+static void soc15_reg_base_init(struct amdgpu_device *adev)
+{
+ /* Set IP register base before any HW register access */
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_RAVEN:
+ case CHIP_RENOIR:
+ vega10_reg_base_init(adev);
+ break;
+ case CHIP_VEGA20:
+ vega20_reg_base_init(adev);
+ break;
+ case CHIP_ARCTURUS:
+ arct_reg_base_init(adev);
+ break;
+ case CHIP_ALDEBARAN:
+ aldebaran_reg_base_init(adev);
+ break;
+ default:
+ DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type);
+ break;
+ }
+}
+
+void soc15_set_virt_ops(struct amdgpu_device *adev)
+{
+ adev->virt.ops = &xgpu_ai_virt_ops;
+
+ /* init soc15 reg base early enough so we can
+ * request request full access for sriov before
+ * set_ip_blocks. */
+ soc15_reg_base_init(adev);
+}
+
+static bool soc15_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we implement soft reset */
+ return true;
+}
+
+static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs */
+ /* Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs */
+ /* Reg 108 is # of posted requests sent on VG20 */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
+ EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3,
+ EVENT1_SEL, 108);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32);
+}
+
+static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ /* CP hangs in IGT reloading test on RN, reset to WA */
+ if (adev->asic_type == CHIP_RENOIR)
+ return true;
+
+ /* Just return false for soc15 GPUs. Reset does not seem to
+ * be necessary.
+ */
+ if (!amdgpu_passthrough(adev))
+ return false;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
+static void soc15_pre_asic_init(struct amdgpu_device *adev)
+{
+ gmc_v9_0_restore_registers(adev);
+}
+
+static const struct amdgpu_asic_funcs soc15_asic_funcs =
+{
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &vega10_doorbell_index_init,
+ .get_pcie_usage = &soc15_get_pcie_usage,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+};
+
+static const struct amdgpu_asic_funcs vega20_asic_funcs =
+{
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &vega20_doorbell_index_init,
+ .get_pcie_usage = &vega20_get_pcie_usage,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+};
+
+static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
+{
+ .read_disabled_bios = &soc15_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc15_read_register,
+ .reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
+ .get_xclk = &soc15_get_xclk,
+ .set_uvd_clocks = &soc15_set_uvd_clocks,
+ .set_vce_clocks = &soc15_set_vce_clocks,
+ .get_config_memsize = &soc15_get_config_memsize,
+ .need_full_reset = &soc15_need_full_reset,
+ .init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
+ .get_pcie_usage = &amdgpu_nbio_get_pcie_usage,
+ .need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
+ .pre_asic_init = &soc15_pre_asic_init,
+ .query_video_codecs = &soc15_query_video_codecs,
+ .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+};
+
+static int soc15_common_early_init(void *handle)
+{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ }
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
+ adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
+ adev->didt_rreg = &soc15_didt_rreg;
+ adev->didt_wreg = &soc15_didt_wreg;
+ adev->gc_cac_rreg = &soc15_gc_cac_rreg;
+ adev->gc_cac_wreg = &soc15_gc_cac_wreg;
+ adev->se_cac_rreg = &soc15_se_cac_rreg;
+ adev->se_cac_wreg = &soc15_se_cac_wreg;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xFF;
+ /* TODO: split the GC and PG flags based on the relevant IP version for which
+ * they are relevant.
+ */
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ adev->asic_funcs = &soc15_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_DRM_MGCG |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_DF_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = 0;
+ adev->external_rev_id = 0x1;
+ break;
+ case IP_VERSION(9, 2, 1):
+ adev->asic_funcs = &soc15_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x14;
+ break;
+ case IP_VERSION(9, 4, 0):
+ adev->asic_funcs = &vega20_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x28;
+ break;
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ adev->asic_funcs = &soc15_asic_funcs;
+
+ if (adev->rev_id >= 0x8)
+ adev->apu_flags |= AMD_APU_IS_RAVEN2;
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ adev->external_rev_id = adev->rev_id + 0x79;
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ adev->external_rev_id = adev->rev_id + 0x41;
+ else if (adev->rev_id == 1)
+ adev->external_rev_id = adev->rev_id + 0x20;
+ else
+ adev->external_rev_id = adev->rev_id + 0x01;
+
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ } else if (adev->apu_flags & AMD_APU_IS_PICASSO) {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ /*
+ * MMHUB PG needs to be disabled for Picasso for
+ * stability reasons.
+ */
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_VCN;
+ } else {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_DRM_MGCG |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ }
+ break;
+ case IP_VERSION(9, 4, 1):
+ adev->asic_funcs = &vega20_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x32;
+ break;
+ case IP_VERSION(9, 3, 0):
+ adev->asic_funcs = &soc15_asic_funcs;
+
+ if (adev->apu_flags & AMD_APU_IS_RENOIR)
+ adev->external_rev_id = adev->rev_id + 0x91;
+ else
+ adev->external_rev_id = adev->rev_id + 0xa1;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_DF_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->asic_funcs = &vega20_asic_funcs;
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
+ case IP_VERSION(9, 4, 3):
+ adev->asic_funcs = &aqua_vanjaram_asic_funcs;
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_IH_CG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x46;
+ /* GC 9.4.3 uses MMIO register region hole at a different offset */
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->rmmio_remap.reg_offset = 0x1A000;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000;
+ }
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_ai_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc15_common_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_ai_mailbox_get_irq(adev);
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc15_common_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_ai_mailbox_add_irq_id(adev);
+
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_init)
+ adev->df.funcs->sw_init(adev);
+
+ return 0;
+}
+
+static int soc15_common_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->df.funcs &&
+ adev->df.funcs->sw_fini)
+ adev->df.funcs->sw_fini(adev);
+ return 0;
+}
+
+static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ /* sdma doorbell range is programed by hypervisor */
+ if (!amdgpu_sriov_vf(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
+ true, adev->doorbell_index.sdma_engine[i] << 1,
+ adev->doorbell_index.sdma_doorbell_range);
+ }
+ }
+}
+
+static int soc15_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* enable aspm */
+ soc15_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->remap_hdp_registers(adev);
+
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ /* HW doorbell routing policy: doorbell writing not
+ * in SDMA/IH/MM/ACV range will be routed to CP. So
+ * we need to init SDMA doorbell range prior
+ * to CP ip block init and ring test. IH already
+ * happens before CP.
+ */
+ soc15_sdma_doorbell_range_init(adev);
+
+ return 0;
+}
+
+static int soc15_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc15_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_ai_mailbox_put_irq(adev);
+
+ if (adev->nbio.ras_if &&
+ amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_controller_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+ if (adev->nbio.ras &&
+ adev->nbio.ras->init_ras_err_event_athub_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc15_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc15_common_hw_fini(adev);
+}
+
+static int soc15_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc15_common_hw_init(adev);
+}
+
+static bool soc15_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int soc15_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int soc15_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DRM_MGCG))
+ data &= ~(0x01000000 |
+ 0x02000000 |
+ 0x04000000 |
+ 0x08000000 |
+ 0x10000000 |
+ 0x20000000 |
+ 0x40000000 |
+ 0x80000000);
+ else
+ data |= (0x01000000 |
+ 0x02000000 |
+ 0x04000000 |
+ 0x08000000 |
+ 0x10000000 |
+ 0x20000000 |
+ 0x40000000 |
+ 0x80000000);
+
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0), data);
+}
+
+static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL));
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DRM_LS))
+ data |= 1;
+ else
+ data &= ~1;
+
+ if (def != data)
+ WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
+}
+
+static int soc15_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 2, 0):
+ case IP_VERSION(7, 4, 0):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ soc15_update_drm_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ soc15_update_drm_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->smuio.funcs->update_rom_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->df.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ case IP_VERSION(2, 5, 0):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ soc15_update_drm_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ soc15_update_drm_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ case IP_VERSION(7, 4, 1):
+ case IP_VERSION(7, 4, 4):
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2)) {
+
+ /* AMD_CG_SUPPORT_DRM_MGCG */
+ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
+ if (!(data & 0x01000000))
+ *flags |= AMD_CG_SUPPORT_DRM_MGCG;
+
+ /* AMD_CG_SUPPORT_DRM_LS */
+ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL));
+ if (data & 0x1)
+ *flags |= AMD_CG_SUPPORT_DRM_LS;
+ }
+
+ /* AMD_CG_SUPPORT_ROM_MGCG */
+ if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+ adev->smuio.funcs->get_clock_gating_state(adev, flags);
+
+ if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+ adev->df.funcs->get_clockgating_state(adev, flags);
+}
+
+static int soc15_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* todo */
+ return 0;
+}
+
+static const struct amd_ip_funcs soc15_common_ip_funcs = {
+ .name = "soc15_common",
+ .early_init = soc15_common_early_init,
+ .late_init = soc15_common_late_init,
+ .sw_init = soc15_common_sw_init,
+ .sw_fini = soc15_common_sw_fini,
+ .hw_init = soc15_common_hw_init,
+ .hw_fini = soc15_common_hw_fini,
+ .suspend = soc15_common_suspend,
+ .resume = soc15_common_resume,
+ .is_idle = soc15_common_is_idle,
+ .wait_for_idle = soc15_common_wait_for_idle,
+ .soft_reset = soc15_common_soft_reset,
+ .set_clockgating_state = soc15_common_set_clockgating_state,
+ .set_powergating_state = soc15_common_set_powergating_state,
+ .get_clockgating_state= soc15_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
new file mode 100644
index 000000000..eac54042c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SOC15_H__
+#define __SOC15_H__
+
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
+
+extern const struct amdgpu_ip_block_version vega10_common_ip_block;
+
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3
+
+struct soc15_reg_golden {
+ u32 hwip;
+ u32 instance;
+ u32 segment;
+ u32 reg;
+ u32 and_mask;
+ u32 or_mask;
+};
+
+struct soc15_reg_rlcg {
+ u32 hwip;
+ u32 instance;
+ u32 segment;
+ u32 reg;
+};
+
+struct soc15_reg {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+};
+
+struct soc15_reg_entry {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ uint32_t se_num;
+ uint32_t instance;
+};
+
+struct soc15_allowed_register_entry {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ bool grbm_indexed;
+};
+
+struct soc15_ras_field_entry {
+ const char *name;
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t sec_count_mask;
+ uint32_t sec_count_shift;
+ uint32_t ded_count_mask;
+ uint32_t ded_count_shift;
+};
+
+#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+
+#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+
+#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
+
+#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT
+
+#define SOC15_REG_FIELD_VAL(val, mask, shift) (((val) & mask) >> shift)
+
+#define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift)
+
+void soc15_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id);
+void soc15_set_virt_ops(struct amdgpu_device *adev);
+
+void soc15_program_register_sequence(struct amdgpu_device *adev,
+ const struct soc15_reg_golden *registers,
+ const u32 array_size);
+
+int vega10_reg_base_init(struct amdgpu_device *adev);
+int vega20_reg_base_init(struct amdgpu_device *adev);
+int arct_reg_base_init(struct amdgpu_device *adev);
+int aldebaran_reg_base_init(struct amdgpu_device *adev);
+void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+
+void vega10_doorbell_index_init(struct amdgpu_device *adev);
+void vega20_doorbell_index_init(struct amdgpu_device *adev);
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
new file mode 100644
index 000000000..da683afa0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SOC15_COMMON_H__
+#define __SOC15_COMMON_H__
+
+/* GET_INST returns the physical instance corresponding to a logical instance */
+#define GET_INST(ip, inst) \
+ (adev->ip_map.logical_to_dev_inst ? \
+ adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst)
+#define GET_MASK(ip, mask) \
+ (adev->ip_map.logical_to_dev_mask ? \
+ adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask)
+
+/* Register Access Macros */
+#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg)
+#define SOC15_REG_OFFSET1(ip, inst, reg, offset) \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset))
+
+#define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \
+ WREG32(reg, value))
+
+#define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \
+ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \
+ amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \
+ RREG32(reg))
+
+#define WREG32_FIELD15(ip, idx, reg, field, val) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ 0, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
+ 0, ip##_HWIP, idx)
+
+#define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ 0, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \
+ 0, ip##_HWIP, idx)
+
+#define RREG32_SOC15(ip, inst, reg) \
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ 0, ip##_HWIP, inst)
+
+#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
+
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+
+#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
+
+#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \
+ (offset), 0, ip##_HWIP, inst)
+
+#define WREG32_SOC15(ip, inst, reg, value) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \
+ value, 0, ip##_HWIP, inst)
+
+#define WREG32_SOC15_IP(ip, reg, value) \
+ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
+
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+
+#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
+ value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
+
+#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \
+ value, 0, ip##_HWIP, inst)
+
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)), \
+ #reg, expected_value, mask)
+
+#define SOC15_WAIT_ON_RREG_OFFSET(ip, inst, reg, offset, expected_value, mask) \
+ amdgpu_device_wait_on_rreg(adev, inst, \
+ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg) + (offset)), \
+ #reg, expected_value, mask)
+
+#define WREG32_RLC(reg, value) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0)
+
+#define WREG32_RLC_EX(prefix, reg, value, inst) \
+ do { \
+ if (amdgpu_sriov_fullaccess(adev)) { \
+ uint32_t i = 0; \
+ uint32_t retries = 50000; \
+ uint32_t r0 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \
+ uint32_t r1 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \
+ uint32_t spare_int = adev->reg_offset[GC_HWIP][inst][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \
+ WREG32(r0, value); \
+ WREG32(r1, (reg | 0x80000000)); \
+ WREG32(spare_int, 0x1); \
+ for (i = 0; i < retries; i++) { \
+ u32 tmp = RREG32(r1); \
+ if (!(tmp & 0x80000000)) \
+ break; \
+ udelay(10); \
+ } \
+ if (i >= retries) \
+ pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
+ } else { \
+ WREG32(reg, value); \
+ } \
+ } while (0)
+
+/* shadow the registers in the callback function */
+#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP, inst)
+
+/* for GC only */
+#define RREG32_RLC(reg) \
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP)
+
+#define WREG32_RLC_NO_KIQ(reg, value, hwip) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
+
+#define RREG32_RLC_NO_KIQ(reg, hwip) \
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
+
+#define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ if (amdgpu_sriov_fullaccess(adev)) { \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \
+ if (target_reg == grbm_cntl) \
+ WREG32(r2, value); \
+ else if (target_reg == grbm_idx) \
+ WREG32(r3, value); \
+ WREG32(target_reg, value); \
+ } else { \
+ WREG32(target_reg, value); \
+ } \
+ } while (0)
+
+#define RREG32_SOC15_RLC(ip, inst, reg) \
+ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+#define WREG32_SOC15_RLC(ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP, inst); \
+ } while (0)
+
+#define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\
+ WREG32_RLC_EX(prefix, target_reg, value, inst); \
+ } while (0)
+
+#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+ (__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \
+ AMDGPU_REGS_RLC, ip##_HWIP, idx) & \
+ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
+ AMDGPU_REGS_RLC, ip##_HWIP, idx)
+
+#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
+ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+#define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \
+ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst)
+
+/* inst equals to ext for some IPs */
+#define RREG32_SOC15_EXT(ip, inst, reg, ext) \
+ RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext)) \
+
+#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \
+ WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \
+ + adev->asic_funcs->encode_ext_smn_addressing(ext), \
+ value) \
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
new file mode 100644
index 000000000..2357ff393
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef SOC15_H
+#define SOC15_H
+
+#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_COMPUTE_RINGS 8
+
+/*
+ * PM4
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+#define PACKETJ_CONDITION_CHECK0 0
+#define PACKETJ_CONDITION_CHECK1 1
+#define PACKETJ_CONDITION_CHECK2 2
+#define PACKETJ_CONDITION_CHECK3 3
+#define PACKETJ_CONDITION_CHECK4 4
+#define PACKETJ_CONDITION_CHECK5 5
+#define PACKETJ_CONDITION_CHECK6 6
+#define PACKETJ_CONDITION_CHECK7 7
+
+#define PACKETJ_TYPE0 0
+#define PACKETJ_TYPE1 1
+#define PACKETJ_TYPE2 2
+#define PACKETJ_TYPE3 3
+#define PACKETJ_TYPE4 4
+#define PACKETJ_TYPE5 5
+#define PACKETJ_TYPE6 6
+#define PACKETJ_TYPE7 7
+
+#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \
+ ((r & 0x3F) << 18) | \
+ ((cond & 0xF) << 24) | \
+ ((type & 0xF) << 28))
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_CONST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ */
+#define PACKET3_RELEASE_MEM 0x49
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+#define EOP_TCL1_VOL_ACTION_EN (1 << 12)
+#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */
+#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
+#define EOP_TCL1_ACTION_EN (1 << 16)
+#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
+#define EOP_TC_NC_ACTION_EN (1 << 19)
+#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
+
+#define DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+
+
+
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_ACQUIRE_MEM 0x58
+/* 1. HEADER
+ * 2. COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 3. COHER_SIZE [31:0]
+ * 4. COHER_SIZE_HI [7:0]
+ * 5. COHER_BASE_LO [31:0]
+ * 6. COHER_BASE_HI [23:0]
+ * 7. POLL_INTERVAL [15:0]
+ */
+/* COHER_CNTL fields for CP_COHER_CNTL */
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
+#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
+#define PACKET3_REWIND 0x59
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_FRAME_CONTROL 0x90
+# define FRAME_TMZ (1 << 0)
+# define FRAME_CMD(x) ((x) << 28)
+ /*
+ * x=0: tmz_begin
+ * x=1: tmz_end
+ */
+
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
+#define PACKET3_SET_RESOURCES 0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
+# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
+#define PACKET3_MAP_QUEUES 0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
+# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
+# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
+# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
+# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
+# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2 */
+# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
+# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+#define PACKET3_UNMAP_QUEUES 0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
+ /* 0 - PREEMPT_QUEUES
+ * 1 - RESET_QUEUES
+ * 2 - DISABLE_PROCESS_QUEUES
+ * 3 - PREEMPT_QUEUES_NO_UNMAP
+ */
+# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2a */
+# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
+/* CONTROL4 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define PACKET3_QUERY_STATUS 0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
+# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
+# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
+/* CONTROL2a */
+# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+
+
+#define VCE_CMD_NO_OP 0x00000000
+#define VCE_CMD_END 0x00000001
+#define VCE_CMD_IB 0x00000002
+#define VCE_CMD_FENCE 0x00000003
+#define VCE_CMD_TRAP 0x00000004
+#define VCE_CMD_IB_AUTO 0x00000005
+#define VCE_CMD_SEMAPHORE 0x00000006
+
+#define VCE_CMD_IB_VM 0x00000102
+#define VCE_CMD_WAIT_GE 0x00000106
+#define VCE_CMD_UPDATE_PTB 0x00000107
+#define VCE_CMD_FLUSH_TLB 0x00000108
+#define VCE_CMD_REG_WRITE 0x00000109
+#define VCE_CMD_REG_WAIT 0x0000010a
+
+#define HEVC_ENC_CMD_NO_OP 0x00000000
+#define HEVC_ENC_CMD_END 0x00000001
+#define HEVC_ENC_CMD_FENCE 0x00000003
+#define HEVC_ENC_CMD_TRAP 0x00000004
+#define HEVC_ENC_CMD_IB_VM 0x00000102
+#define HEVC_ENC_CMD_REG_WRITE 0x00000109
+#define HEVC_ENC_CMD_REG_WAIT 0x0000010a
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
new file mode 100644
index 000000000..8b2ff2b28
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -0,0 +1,896 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "mp/mp_13_0_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc21_common_ip_funcs;
+
+/* SOC21 */
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+/* SRIOV SOC21, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
+ .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
+ .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
+static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (adev->ip_versions[UVD_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
+ case IP_VERSION(4, 0, 4):
+ if (amdgpu_sriov_vf(adev)) {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ } else {
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+ } else {
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ }
+ }
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 soc21_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc21_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static bool soc21_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry soc21_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc21_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc21_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc21_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) {
+ en = &soc21_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc21_get_register_value(adev,
+ soc21_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+#if 0
+static int soc21_asic_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+#endif
+
+static enum amd_reset_method
+soc21_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
+ case IP_VERSION(13, 0, 10):
+ return AMD_RESET_METHOD_MODE1;
+ case IP_VERSION(13, 0, 4):
+ case IP_VERSION(13, 0, 11):
+ return AMD_RESET_METHOD_MODE2;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc21_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc21_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static int soc21_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int soc21_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void soc21_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc21_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc21_common_ip_funcs,
+};
+
+static bool soc21_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static void soc21_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc21_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc21_asic_funcs = {
+ .read_disabled_bios = &soc21_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc21_read_register,
+ .reset = &soc21_asic_reset,
+ .reset_method = &soc21_asic_reset_method,
+ .get_xclk = &soc21_get_xclk,
+ .set_uvd_clocks = &soc21_set_uvd_clocks,
+ .set_vce_clocks = &soc21_set_vce_clocks,
+ .get_config_memsize = &soc21_get_config_memsize,
+ .init_doorbell_index = &soc21_init_doorbell_index,
+ .need_full_reset = &soc21_need_full_reset,
+ .need_reset_on_init = &soc21_need_reset_on_init,
+ .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc21_pre_asic_init,
+ .query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
+};
+
+static int soc21_common_early_init(void *handle)
+{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &soc21_didt_rreg;
+ adev->didt_wreg = &soc21_didt_wreg;
+
+ adev->asic_funcs = &soc21_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update
+ break;
+ case IP_VERSION(11, 0, 2):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 0, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ case IP_VERSION(11, 0, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x20;
+ break;
+ case IP_VERSION(11, 0, 4):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x80;
+ break;
+
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc21_common_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+ !amdgpu_sriov_is_av1_support(adev)) {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
+ } else {
+ amdgpu_virt_update_sriov_video_codec(adev,
+ sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+ sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+ ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
+ }
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbio v4_3 only support fatal error hanlding
+ * just enable the interrupt directly */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc21_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* enable aspm */
+ soc21_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->remap_hdp_registers(adev);
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc21_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc21_common_hw_fini(adev);
+}
+
+static int soc21_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc21_common_hw_init(adev);
+}
+
+static bool soc21_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int soc21_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc21_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->ip_versions[LSDMA_HWIP][0]) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc21_common_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs soc21_common_ip_funcs = {
+ .name = "soc21_common",
+ .early_init = soc21_common_early_init,
+ .late_init = soc21_common_late_init,
+ .sw_init = soc21_common_sw_init,
+ .sw_fini = soc21_common_sw_fini,
+ .hw_init = soc21_common_hw_init,
+ .hw_fini = soc21_common_hw_fini,
+ .suspend = soc21_common_suspend,
+ .resume = soc21_common_resume,
+ .is_idle = soc21_common_is_idle,
+ .wait_for_idle = soc21_common_wait_for_idle,
+ .soft_reset = soc21_common_soft_reset,
+ .set_clockgating_state = soc21_common_set_clockgating_state,
+ .set_powergating_state = soc21_common_set_powergating_state,
+ .get_clockgating_state = soc21_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.h b/drivers/gpu/drm/amd/amdgpu/soc21.h
new file mode 100644
index 000000000..4c8067af1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC21_H__
+#define __SOC21_H__
+
+extern const struct amdgpu_ip_block_version soc21_common_ip_block;
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_rap_if.h b/drivers/gpu/drm/amd/amdgpu/ta_rap_if.h
new file mode 100644
index 000000000..f14833fae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ta_rap_if.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _TA_RAP_IF_H
+#define _TA_RAP_IF_H
+
+/* Responses have bit 31 set */
+#define RSP_ID_MASK (1U << 31)
+#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+
+enum ta_rap_status {
+ TA_RAP_STATUS__SUCCESS = 1,
+ TA_RAP_STATUS__ERROR_GENERIC_FAILURE = 2,
+ TA_RAP_STATUS__ERROR_CMD_NOT_SUPPORTED = 3,
+ TA_RAP_STATUS__ERROR_INVALID_VALIDATION_METHOD = 4,
+ TA_RAP_STATUS__ERROR_NULL_POINTER = 5,
+ TA_RAP_STATUS__ERROR_NOT_INITIALIZED = 6,
+ TA_RAP_STATUS__ERROR_VALIDATION_FAILED = 7,
+ TA_RAP_STATUS__ERROR_ASIC_NOT_SUPPORTED = 8,
+ TA_RAP_STATUS__ERROR_OPERATION_NOT_PERMISSABLE = 9,
+ TA_RAP_STATUS__ERROR_ALREADY_INIT = 10,
+};
+
+enum ta_rap_cmd {
+ TA_CMD_RAP__INITIALIZE = 1,
+ TA_CMD_RAP__VALIDATE_L0 = 2,
+};
+
+enum ta_rap_validation_method {
+ METHOD_A = 1,
+};
+
+struct ta_rap_cmd_input_data {
+ uint8_t reserved[8];
+};
+
+struct ta_rap_cmd_output_data {
+ uint32_t last_subsection;
+ uint32_t num_total_validate;
+ uint32_t num_valid;
+ uint32_t last_validate_addr;
+ uint32_t last_validate_val;
+ uint32_t last_validate_val_exptd;
+};
+
+union ta_rap_cmd_input {
+ struct ta_rap_cmd_input_data input;
+};
+
+union ta_rap_cmd_output {
+ struct ta_rap_cmd_output_data output;
+};
+
+struct ta_rap_shared_memory {
+ uint32_t cmd_id;
+ uint32_t validation_method_id;
+ uint32_t resp_id;
+ enum ta_rap_status rap_status;
+ union ta_rap_cmd_input rap_in_message;
+ union ta_rap_cmd_output rap_out_message;
+ uint8_t reserved[64];
+};
+
+#endif // #define _TA_RAP_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
new file mode 100644
index 000000000..879bb7af2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _TA_RAS_IF_H
+#define _TA_RAS_IF_H
+
+#define RAS_TA_HOST_IF_VER 0
+
+/* Responses have bit 31 set */
+#define RSP_ID_MASK (1U << 31)
+#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+
+/* RAS related enumerations */
+/**********************************************************/
+enum ras_command {
+ TA_RAS_COMMAND__ENABLE_FEATURES = 0,
+ TA_RAS_COMMAND__DISABLE_FEATURES,
+ TA_RAS_COMMAND__TRIGGER_ERROR,
+};
+
+enum ta_ras_status {
+ TA_RAS_STATUS__SUCCESS = 0x0000,
+ TA_RAS_STATUS__RESET_NEEDED = 0xA001,
+ TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0xA002,
+ TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0xA003,
+ TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0xA004,
+ TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0xA005,
+ TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0xA006,
+ TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0xA007,
+ TA_RAS_STATUS__ERROR_TIMEOUT = 0xA008,
+ TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0XA009,
+ TA_RAS_STATUS__ERROR_GENERIC = 0xA00A,
+ TA_RAS_STATUS__ERROR_RAS_MMHUB_INIT = 0xA00B,
+ TA_RAS_STATUS__ERROR_GET_DEV_INFO = 0xA00C,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_DEV = 0xA00D,
+ TA_RAS_STATUS__ERROR_NOT_INITIALIZED = 0xA00E,
+ TA_RAS_STATUS__ERROR_TEE_INTERNAL = 0xA00F,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_FUNCTION = 0xA010,
+ TA_RAS_STATUS__ERROR_SYS_DRV_REG_ACCESS = 0xA011,
+ TA_RAS_STATUS__ERROR_RAS_READ_WRITE = 0xA012,
+ TA_RAS_STATUS__ERROR_NULL_PTR = 0xA013,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_IP = 0xA014,
+ TA_RAS_STATUS__ERROR_PCS_STATE_QUIET = 0xA015,
+ TA_RAS_STATUS__ERROR_PCS_STATE_ERROR = 0xA016,
+ TA_RAS_STATUS__ERROR_PCS_STATE_HANG = 0xA017,
+ TA_RAS_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018,
+ TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019,
+ TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED = 0xA01A
+};
+
+enum ta_ras_block {
+ TA_RAS_BLOCK__UMC = 0,
+ TA_RAS_BLOCK__SDMA,
+ TA_RAS_BLOCK__GFX,
+ TA_RAS_BLOCK__MMHUB,
+ TA_RAS_BLOCK__ATHUB,
+ TA_RAS_BLOCK__PCIE_BIF,
+ TA_RAS_BLOCK__HDP,
+ TA_RAS_BLOCK__XGMI_WAFL,
+ TA_RAS_BLOCK__DF,
+ TA_RAS_BLOCK__SMN,
+ TA_RAS_BLOCK__SEM,
+ TA_RAS_BLOCK__MP0,
+ TA_RAS_BLOCK__MP1,
+ TA_RAS_BLOCK__FUSE,
+ TA_RAS_BLOCK__MCA,
+ TA_RAS_BLOCK__VCN,
+ TA_RAS_BLOCK__JPEG,
+ TA_NUM_BLOCK_MAX
+};
+
+enum ta_ras_mca_block {
+ TA_RAS_MCA_BLOCK__MP0 = 0,
+ TA_RAS_MCA_BLOCK__MP1 = 1,
+ TA_RAS_MCA_BLOCK__MPIO = 2,
+ TA_RAS_MCA_BLOCK__IOHC = 3,
+ TA_MCA_NUM_BLOCK_MAX
+};
+
+enum ta_ras_error_type {
+ TA_RAS_ERROR__NONE = 0,
+ TA_RAS_ERROR__PARITY = 1,
+ TA_RAS_ERROR__SINGLE_CORRECTABLE = 2,
+ TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
+ TA_RAS_ERROR__POISON = 8,
+};
+
+/* Input/output structures for RAS commands */
+/**********************************************************/
+
+struct ta_ras_enable_features_input {
+ enum ta_ras_block block_id;
+ enum ta_ras_error_type error_type;
+};
+
+struct ta_ras_disable_features_input {
+ enum ta_ras_block block_id;
+ enum ta_ras_error_type error_type;
+};
+
+struct ta_ras_trigger_error_input {
+ enum ta_ras_block block_id; // ras-block. i.e. umc, gfx
+ enum ta_ras_error_type inject_error_type; // type of error. i.e. single_correctable
+ uint32_t sub_block_index; // mem block. i.e. hbm, sram etc.
+ uint64_t address; // explicit address of error
+ uint64_t value; // method if error injection. i.e persistent, coherent etc.
+};
+
+struct ta_ras_init_flags {
+ uint8_t poison_mode_en;
+ uint8_t dgpu_mode;
+ uint16_t xcc_mask;
+ uint8_t channel_dis_num;
+};
+
+struct ta_ras_output_flags {
+ uint8_t ras_init_success_flag;
+ uint8_t err_inject_switch_disable_flag;
+ uint8_t reg_access_failure_flag;
+};
+
+/* Common input structure for RAS callbacks */
+/**********************************************************/
+union ta_ras_cmd_input {
+ struct ta_ras_init_flags init_flags;
+ struct ta_ras_enable_features_input enable_features;
+ struct ta_ras_disable_features_input disable_features;
+ struct ta_ras_trigger_error_input trigger_error;
+
+ uint32_t reserve_pad[256];
+};
+
+union ta_ras_cmd_output {
+ struct ta_ras_output_flags flags;
+
+ uint32_t reserve_pad[256];
+};
+
+/* Shared Memory structures */
+/**********************************************************/
+struct ta_ras_shared_memory {
+ uint32_t cmd_id;
+ uint32_t resp_id;
+ uint32_t ras_status;
+ uint32_t if_version;
+ union ta_ras_cmd_input ras_in_message;
+ union ta_ras_cmd_output ras_out_message;
+};
+
+#endif // TL_RAS_IF_H_
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
new file mode 100644
index 000000000..00d8bdb82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _TA_SECUREDISPLAY_IF_H
+#define _TA_SECUREDISPLAY_IF_H
+
+/** Secure Display related enumerations */
+/**********************************************************/
+
+/** @enum ta_securedisplay_command
+ * Secure Display Command ID
+ */
+enum ta_securedisplay_command {
+ /* Query whether TA is responding used only for validation purpose */
+ TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
+ /* Send region of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* Maximum Command ID */
+ TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
+};
+
+/** @enum ta_securedisplay_status
+ * Secure Display status returns in shared buffer status
+ */
+enum ta_securedisplay_status {
+ TA_SECUREDISPLAY_STATUS__SUCCESS = 0x00, /* Success */
+ TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE = 0x01, /* Generic Failure */
+ TA_SECUREDISPLAY_STATUS__INVALID_PARAMETER = 0x02, /* Invalid Parameter */
+ TA_SECUREDISPLAY_STATUS__NULL_POINTER = 0x03, /* Null Pointer*/
+ TA_SECUREDISPLAY_STATUS__I2C_WRITE_ERROR = 0x04, /* Fail to Write to I2C */
+ TA_SECUREDISPLAY_STATUS__READ_DIO_SCRATCH_ERROR = 0x05, /*Fail Read DIO Scratch Register*/
+ TA_SECUREDISPLAY_STATUS__READ_CRC_ERROR = 0x06, /* Fail to Read CRC*/
+ TA_SECUREDISPLAY_STATUS__I2C_INIT_ERROR = 0x07, /* Failed to initialize I2C */
+
+ TA_SECUREDISPLAY_STATUS__MAX = 0x7FFFFFFF,/* Maximum Value for status*/
+};
+
+/** @enum ta_securedisplay_phy_ID
+ * Physical ID number to use for reading corresponding DIO Scratch register for ROI
+ */
+enum ta_securedisplay_phy_ID {
+ TA_SECUREDISPLAY_PHY0 = 0,
+ TA_SECUREDISPLAY_PHY1 = 1,
+ TA_SECUREDISPLAY_PHY2 = 2,
+ TA_SECUREDISPLAY_PHY3 = 3,
+ TA_SECUREDISPLAY_MAX_PHY = 4,
+};
+
+/** @enum ta_securedisplay_ta_query_cmd_ret
+ * A predefined specific reteurn value which is 0xAB only used to validate
+ * communication to Secure Display TA is functional.
+ * This value is used to validate whether TA is responding successfully
+ */
+enum ta_securedisplay_ta_query_cmd_ret {
+ /* This is a value to validate if TA is loaded successfully */
+ TA_SECUREDISPLAY_QUERY_CMD_RET = 0xAB,
+};
+
+/** @enum ta_securedisplay_buffer_size
+ * I2C Buffer size which contains 8 bytes of ROI (X start, X end, Y start, Y end)
+ * and 6 bytes of CRC( R,G,B) and 1 byte for physical ID
+ */
+enum ta_securedisplay_buffer_size {
+ /* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
+ TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+};
+
+/** Input/output structures for Secure Display commands */
+/**********************************************************/
+/**
+ * Input structures
+ */
+
+/** @struct ta_securedisplay_send_roi_crc_input
+ * Physical ID to determine which DIO scratch register should be used to get ROI
+ */
+struct ta_securedisplay_send_roi_crc_input {
+ uint32_t phy_id; /* Physical ID */
+};
+
+/** @union ta_securedisplay_cmd_input
+ * Input buffer
+ */
+union ta_securedisplay_cmd_input {
+ /* send ROI and CRC input buffer format */
+ struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ uint32_t reserved[4];
+};
+
+/**
+ * Output structures
+ */
+
+/** @struct ta_securedisplay_query_ta_output
+ * Output buffer format for query TA whether TA is responding used only for validation purpose
+ */
+struct ta_securedisplay_query_ta_output {
+ /* return value from TA when it is queried for validation purpose only */
+ uint32_t query_cmd_ret;
+};
+
+/** @struct ta_securedisplay_send_roi_crc_output
+ * Output buffer format for send ROI CRC command which will pass I2c buffer created inside TA
+ * and used to write to I2C used only for validation purpose
+ */
+struct ta_securedisplay_send_roi_crc_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_I2C_BUFFER_SIZE]; /* I2C buffer */
+ uint8_t reserved;
+};
+
+/** @union ta_securedisplay_cmd_output
+ * Output buffer
+ */
+union ta_securedisplay_cmd_output {
+ /* Query TA output buffer format used only for validation purpose*/
+ struct ta_securedisplay_query_ta_output query_ta;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ uint32_t reserved[4];
+};
+
+/** @struct ta_securedisplay_cmd
+* Secure display command which is shared buffer memory
+*/
+struct ta_securedisplay_cmd {
+ uint32_t cmd_id; /**< +0 Bytes Command ID */
+ enum ta_securedisplay_status status; /**< +4 Bytes Status code returned by the secure display TA */
+ uint32_t reserved[2]; /**< +8 Bytes Reserved */
+ union ta_securedisplay_cmd_input securedisplay_in_message; /**< +16 Bytes Command input buffer */
+ union ta_securedisplay_cmd_output securedisplay_out_message; /**< +32 Bytes Command output buffer */
+ /**@note Total 48 Bytes */
+};
+
+#endif //_TA_SECUREDISPLAY_IF_H
+
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
new file mode 100644
index 000000000..da815a93d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _TA_XGMI_IF_H
+#define _TA_XGMI_IF_H
+
+/* Responses have bit 31 set */
+#define RSP_ID_MASK (1U << 31)
+#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+
+enum ta_command_xgmi {
+ TA_COMMAND_XGMI__INITIALIZE = 0x00,
+ TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
+ TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
+ TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
+ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04,
+ TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B
+};
+
+/* XGMI related enumerations */
+/**********************************************************/;
+enum ta_xgmi_connected_nodes {
+ TA_XGMI__MAX_CONNECTED_NODES = 64
+};
+
+enum ta_xgmi_status {
+ TA_XGMI_STATUS__SUCCESS = 0x00,
+ TA_XGMI_STATUS__GENERIC_FAILURE = 0x01,
+ TA_XGMI_STATUS__NULL_POINTER = 0x02,
+ TA_XGMI_STATUS__INVALID_PARAMETER = 0x03,
+ TA_XGMI_STATUS__NOT_INITIALIZED = 0x04,
+ TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05,
+ TA_XGMI_STATUS__INVALID_NODE_ID = 0x06,
+ TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07,
+ TA_XGMI_STATUS__FAILED_ID_GEN = 0x08,
+ TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09,
+ TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A
+};
+
+enum ta_xgmi_assigned_sdma_engine {
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4,
+ TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5
+};
+
+/* input/output structures for XGMI commands */
+/**********************************************************/
+struct ta_xgmi_node_info {
+ uint64_t node_id;
+ uint8_t num_hops;
+ uint8_t is_sharing_enabled;
+ enum ta_xgmi_assigned_sdma_engine sdma_engine;
+};
+
+struct ta_xgmi_peer_link_info {
+ uint64_t node_id;
+ uint8_t num_links;
+};
+
+struct ta_xgmi_cmd_initialize_output {
+ uint32_t status;
+};
+
+struct ta_xgmi_cmd_get_node_id_output {
+ uint64_t node_id;
+};
+
+struct ta_xgmi_cmd_get_hive_id_output {
+ uint64_t hive_id;
+};
+
+struct ta_xgmi_cmd_get_topology_info_input {
+ uint32_t num_nodes;
+ struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
+
+struct ta_xgmi_cmd_get_topology_info_output {
+ uint32_t num_nodes;
+ struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
+
+struct ta_xgmi_cmd_get_peer_link_info_output {
+ uint32_t num_nodes;
+ struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
+
+struct ta_xgmi_cmd_set_topology_info_input {
+ uint32_t num_nodes;
+ struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
+
+/**********************************************************/
+/* Common input structure for XGMI callbacks */
+union ta_xgmi_cmd_input {
+ struct ta_xgmi_cmd_get_topology_info_input get_topology_info;
+ struct ta_xgmi_cmd_set_topology_info_input set_topology_info;
+};
+
+/* Common output structure for XGMI callbacks */
+union ta_xgmi_cmd_output {
+ struct ta_xgmi_cmd_initialize_output initialize;
+ struct ta_xgmi_cmd_get_node_id_output get_node_id;
+ struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
+ struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
+ struct ta_xgmi_cmd_get_peer_link_info_output get_link_info;
+};
+/**********************************************************/
+
+struct ta_xgmi_shared_memory {
+ uint32_t cmd_id;
+ uint32_t resp_id;
+ enum ta_xgmi_status xgmi_status;
+ uint8_t flag_extend_link_record;
+ uint8_t reserved0[3];
+ union ta_xgmi_cmd_input xgmi_in_message;
+ union ta_xgmi_cmd_output xgmi_out_message;
+};
+
+#endif //_TA_XGMI_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
new file mode 100644
index 000000000..917707bba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "vid.h"
+
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#include "bif/bif_5_1_d.h"
+#include "bif/bif_5_1_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written. When the
+ * pointers are equal, the ring is idle. When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr. When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * tonga_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (VI).
+ */
+static void tonga_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+/**
+ * tonga_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (VI).
+ */
+static void tonga_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+/**
+ * tonga_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int tonga_ih_irq_init(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl, ih_rb_cntl, ih_doorbell_rtpr;
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ int rb_bufsz;
+
+ /* disable irqs */
+ tonga_ih_disable_interrupts(adev);
+
+ /* setup interrupt control */
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(mmIH_RB_BASE, ih->gpu_addr >> 8);
+
+ rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+ ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ if (adev->irq.msi_enabled)
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 1);
+
+ WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+ /* set the writeback address whether it's enabled or not */
+ WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+ WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+ /* set rptr, wptr to 0 */
+ WREG32(mmIH_RB_RPTR, 0);
+ WREG32(mmIH_RB_WPTR, 0);
+
+ ih_doorbell_rtpr = RREG32(mmIH_DOORBELL_RPTR);
+ if (adev->irq.ih.use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
+ OFFSET, adev->irq.ih.doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ WREG32(mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ tonga_ih_enable_interrupts(adev);
+
+ return 0;
+}
+
+/**
+ * tonga_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VI).
+ */
+static void tonga_ih_irq_disable(struct amdgpu_device *adev)
+{
+ tonga_ih_disable_interrupts(adev);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * tonga_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VI). Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cz_irq_process(VI).
+ * Returns the value of the wptr.
+ */
+static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32(mmIH_RB_WPTR);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 16). Hopefully
+ * this should allow us to catchup.
+ */
+
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * tonga_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to decode
+ * @entry: IV entry to place decoded information into
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void tonga_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[4];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ entry->src_id = dw[0] & 0xff;
+ entry->src_data[0] = dw[1] & 0xfffffff;
+ entry->ring_id = dw[2] & 0xff;
+ entry->vmid = (dw[2] >> 8) & 0xff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 16;
+}
+
+/**
+ * tonga_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void tonga_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ } else {
+ WREG32(mmIH_RB_RPTR, ih->rptr);
+ }
+}
+
+static int tonga_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret)
+ return ret;
+
+ tonga_ih_set_interrupt_funcs(adev);
+
+ return 0;
+}
+
+static int tonga_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int tonga_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+ amdgpu_irq_remove_domain(adev);
+
+ return 0;
+}
+
+static int tonga_ih_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = tonga_ih_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int tonga_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ tonga_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int tonga_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return tonga_ih_hw_fini(adev);
+}
+
+static int tonga_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return tonga_ih_hw_init(adev);
+}
+
+static bool tonga_ih_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return false;
+
+ return true;
+}
+
+static int tonga_ih_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32(mmSRBM_STATUS);
+ if (!REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static bool tonga_ih_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+ SOFT_RESET_IH, 1);
+
+ if (srbm_soft_reset) {
+ adev->irq.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->irq.srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int tonga_ih_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->irq.srbm_soft_reset)
+ return 0;
+
+ return tonga_ih_hw_fini(adev);
+}
+
+static int tonga_ih_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->irq.srbm_soft_reset)
+ return 0;
+
+ return tonga_ih_hw_init(adev);
+}
+
+static int tonga_ih_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->irq.srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->irq.srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int tonga_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int tonga_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs tonga_ih_ip_funcs = {
+ .name = "tonga_ih",
+ .early_init = tonga_ih_early_init,
+ .late_init = NULL,
+ .sw_init = tonga_ih_sw_init,
+ .sw_fini = tonga_ih_sw_fini,
+ .hw_init = tonga_ih_hw_init,
+ .hw_fini = tonga_ih_hw_fini,
+ .suspend = tonga_ih_suspend,
+ .resume = tonga_ih_resume,
+ .is_idle = tonga_ih_is_idle,
+ .wait_for_idle = tonga_ih_wait_for_idle,
+ .check_soft_reset = tonga_ih_check_soft_reset,
+ .pre_soft_reset = tonga_ih_pre_soft_reset,
+ .soft_reset = tonga_ih_soft_reset,
+ .post_soft_reset = tonga_ih_post_soft_reset,
+ .set_clockgating_state = tonga_ih_set_clockgating_state,
+ .set_powergating_state = tonga_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs tonga_ih_funcs = {
+ .get_wptr = tonga_ih_get_wptr,
+ .decode_iv = tonga_ih_decode_iv,
+ .set_rptr = tonga_ih_set_rptr
+};
+
+static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &tonga_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version tonga_ih_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &tonga_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.h b/drivers/gpu/drm/amd/amdgpu/tonga_ih.h
new file mode 100644
index 000000000..499027eee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __TONGA_IH_H__
+#define __TONGA_IH_H__
+
+extern const struct amdgpu_ip_block_version tonga_ih_ip_block;
+
+#endif /* __TONGA_IH_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
new file mode 100644
index 000000000..e5ebd0842
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
@@ -0,0 +1,2245 @@
+/*
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __TONGA_SDMA_PKT_OPEN_H_
+#define __TONGA_SDMA_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_GEN_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+
+/*define for op field*/
+#define SDMA_PKT_HEADER_op_offset 0
+#define SDMA_PKT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_op_shift 0
+#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_HEADER_sub_op_offset 0
+#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_sub_op_shift 8
+#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_shift 14
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_ha_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_shift 22
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_mask) << SDMA_PKT_COPY_TILED_DW_3_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_3_height_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_height_shift 16
+#define SDMA_PKT_COPY_TILED_DW_3_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_height_mask) << SDMA_PKT_COPY_TILED_DW_3_height_shift)
+
+/*define for DW_4 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_DW_4_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_slice_pitch_mask) << SDMA_PKT_COPY_TILED_DW_4_slice_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00000FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 11
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_height_shift)
+
+/*define for DW_6 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_slice_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIT_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_shift 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_BANK_W(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_BANK_H(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_shift 21
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_NUM_BANK(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_pipe_config_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00000FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_ha field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_shift 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_HA(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_ha_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_pitch_in_tile field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_mask) << SDMA_PKT_COPY_T2T_DW_4_src_pitch_in_tile_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_T2T_DW_5_src_slice_pitch_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_pitch_in_tile field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_pitch_in_tile_shift)
+
+/*define for DW_11 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_mask 0x00000FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_PITCH_IN_TILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_pitch_in_tile_shift)
+
+/*define for DW_5 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_slice_pitch_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_ha field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_shift 22
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_HA(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_ha_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_ha field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_shift 30
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_HA(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_ha_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for pitch_in_tile field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_PITCH_IN_TILE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_mask) << SDMA_PKT_WRITE_TILED_DW_3_pitch_in_tile_shift)
+
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_height_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_height_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_3_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_height_mask) << SDMA_PKT_WRITE_TILED_DW_3_height_shift)
+
+/*define for DW_4 word*/
+/*define for slice_pitch field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_SLICE_PITCH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_mask) << SDMA_PKT_WRITE_TILED_DW_4_slice_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00000FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0000FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+#endif /* __TONGA_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
new file mode 100644
index 000000000..0d6b50528
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_0.h"
+#include "amdgpu.h"
+
+static void umc_v6_0_init_registers(struct amdgpu_device *adev)
+{
+ unsigned i,j;
+
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002);
+}
+
+const struct amdgpu_umc_funcs umc_v6_0_funcs = {
+ .init_registers = umc_v6_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
new file mode 100644
index 000000000..109f1a57a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_0_H__
+#define __UMC_V6_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+extern const struct amdgpu_umc_funcs umc_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
new file mode 100644
index 000000000..f17d297b5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_1.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+
+#include "rsmu/rsmu_0_0_2_offset.h"
+#include "rsmu/rsmu_0_0_2_sh_mask.h"
+#include "umc/umc_6_1_1_offset.h"
+#include "umc/umc_6_1_1_sh_mask.h"
+#include "umc/umc_6_1_2_offset.h"
+
+#define UMC_6_INST_DIST 0x40000
+
+const uint32_t
+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
+ {2, 18, 11, 27}, {4, 20, 13, 29},
+ {1, 17, 8, 24}, {7, 23, 14, 30},
+ {10, 26, 3, 19}, {12, 28, 5, 21},
+ {9, 25, 0, 16}, {15, 31, 6, 22}
+};
+
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
+{
+ uint32_t rsmu_umc_addr, rsmu_umc_val;
+
+ rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+ rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
+
+ rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 1);
+
+ WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
+}
+
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+ uint32_t rsmu_umc_addr, rsmu_umc_val;
+
+ rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+ rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
+
+ rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 0);
+
+ WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
+}
+
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
+{
+ uint32_t rsmu_umc_addr, rsmu_umc_val;
+
+ rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+ rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
+
+ return REG_GET_FIELD(rsmu_umc_val,
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN);
+}
+
+static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
+}
+
+static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ mmUMCCH0_0_EccErrCnt_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ mmUMCCH0_0_EccErrCnt);
+ }
+
+ /* select the lower chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear lower chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_1_CE_CNT_INIT);
+
+ /* select the higher chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear higher chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_1_CE_CNT_INIT);
+}
+
+static void umc_v6_1_clear_error_count(struct amdgpu_device *adev)
+{
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+ uint32_t rsmu_umc_index_state =
+ umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_clear_error_count_per_channel(adev,
+ umc_reg_offset);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_1_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_1_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ if ((adev->asic_type == CHIP_ARCTURUS) &&
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ DRM_WARN("Fail to disable DF-Cstate.\n");
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v6_1_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ if ((adev->asic_type == CHIP_ARCTURUS) &&
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ DRM_WARN("Fail to enable DF-Cstate\n");
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+
+ umc_v6_1_clear_error_count(adev);
+}
+
+static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t umc_reg_offset,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint32_t lsb, mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
+ uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+ }
+
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ /* the lowest lsb bits should be ignored */
+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr &= ~((0x1ULL << lsb) - 1);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+}
+
+static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ if ((adev->asic_type == CHIP_ARCTURUS) &&
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ DRM_WARN("Fail to disable DF-Cstate.\n");
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ ch_inst,
+ umc_inst);
+ }
+
+ if ((adev->asic_type == CHIP_ARCTURUS) &&
+ amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ DRM_WARN("Fail to enable DF-Cstate\n");
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ }
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
+}
+
+static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
+{
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
+ .query_ras_error_count = umc_v6_1_query_ras_error_count,
+ .query_ras_error_address = umc_v6_1_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_1_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_1_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+}; \ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
new file mode 100644
index 000000000..50c632eb4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_1_H__
+#define __UMC_V6_1_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* HBM Memory Channel Width */
+#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128
+/* number of umc channel instance with memory map register access */
+#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4
+/* number of umc instance with memory map register access */
+#define UMC_V6_1_UMC_INSTANCE_NUM 8
+/* total channel instances in one umc block */
+#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
+/* UMC regiser per channel offset */
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20 0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT 0x400
+
+/* EccErrCnt max value */
+#define UMC_V6_1_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V6_1_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v6_1_ras;
+extern const uint32_t
+ umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM];
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
new file mode 100644
index 000000000..530549314
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_7.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+const uint32_t
+ umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
+ {28, 20, 24, 16, 12, 4, 8, 0},
+ {6, 30, 2, 26, 22, 14, 18, 10},
+ {19, 11, 15, 7, 3, 27, 31, 23},
+ {9, 1, 5, 29, 25, 17, 21, 13}
+};
+const uint32_t
+ umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
+ {19, 11, 15, 7, 3, 27, 31, 23},
+ {9, 1, 5, 29, 25, 17, 21, 13},
+ {28, 20, 24, 16, 12, 4, 8, 0},
+ {6, 30, 2, 26, 22, 14, 18, 10},
+};
+
+static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* adjust umc and channel index offset,
+ * the register address is not linear on each umc instace */
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
+ return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
+}
+
+static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
+ uint64_t mc_umc_status, uint32_t umc_reg_offset)
+{
+ uint32_t mc_umc_addr;
+ uint64_t reg_value;
+
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
+ dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+
+ if (mc_umc_status)
+ dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
+
+ /* print IPID registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print SYND registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print MISC0 registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+}
+
+static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ if (ras->umc_ecc.record_ce_addr_supported) {
+ uint64_t err_addr, soc_pa;
+ uint32_t channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
+}
+
+static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
+}
+
+static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v6_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+
+ umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status);
+}
+
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint32_t channel_index;
+ uint64_t soc_pa, retired_page, column;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ /* clear [C4 C3 C2] in soc physical address */
+ soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+
+ /* shift R14 bit */
+ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
+}
+
+static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ return 0;
+}
+
+static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_ecc_info_query_error_address, ras_error_status);
+}
+
+static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ {
+ uint64_t err_addr, soc_pa;
+ uint32_t mc_umc_addrt0;
+ uint32_t channel_index;
+
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
+ }
+}
+
+static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
+}
+
+static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ regUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0,
+ regUMCCH0_0_EccErrCnt);
+
+ /* select the lower chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear lower chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_7_CE_CNT_INIT);
+
+ /* select the higher chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_EccErrCntSel,
+ EccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear higher chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V6_7_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_reset_error_count_per_channel, NULL);
+}
+
+static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v6_7_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count),
+ ch_inst, umc_inst);
+
+ umc_v6_7_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_ecc_error_count, ras_error_status);
+
+ umc_v6_7_reset_error_count(adev);
+}
+
+static int umc_v6_7_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t mc_umc_status_addr;
+ uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr =
+ REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v6_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v6_7_query_error_address, ras_error_status);
+}
+
+static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
+ struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_ctrl_addr, ecc_ctrl;
+
+ ecc_ctrl_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl);
+ ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
+ umc_reg_offset) * 4);
+
+ return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn);
+}
+
+static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ uint32_t umc_reg_offset = 0;
+
+ /* Enabling fatal error in umc instance0 channel0 will be
+ * considered as fatal error mode
+ */
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
+ return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
+ .query_ras_error_count = umc_v6_7_query_ras_error_count,
+ .query_ras_error_address = umc_v6_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v6_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v6_7_ras_hw_ops,
+ },
+ .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
new file mode 100644
index 000000000..105245d5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_7_H__
+#define __UMC_V6_7_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* EccErrCnt max value */
+#define UMC_V6_7_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V6_7_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V6_7_CE_CNT_INIT (UMC_V6_7_CE_CNT_MAX - UMC_V6_7_CE_INT_THRESHOLD)
+
+#define UMC_V6_7_INST_DIST 0x40000
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V6_7_UMC_INSTANCE_NUM 4
+/* number of umc instance with memory map register access */
+#define UMC_V6_7_CHANNEL_INSTANCE_NUM 8
+/* total channel instances in one umc block */
+#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM)
+/* one piece of normalizing address is mapped to 8 pieces of physical address */
+#define UMC_V6_7_NA_MAP_PA_NUM 8
+/* R14 bit shift should be considered, double the number */
+#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2)
+/* The CH4 bit in SOC physical address */
+#define UMC_V6_7_PA_CH4_BIT 12
+/* The C2 bit in SOC physical address */
+#define UMC_V6_7_PA_C2_BIT 17
+/* The R14 bit in SOC physical address */
+#define UMC_V6_7_PA_R14_BIT 34
+/* UMC regiser per channel offset */
+#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
+
+/* XOR bit 20, 25, 34 of PA into CH4 bit (bit 12 of PA),
+ * hash bit is only effective when related setting is enabled
+ */
+#define CHANNEL_HASH(channel_idx, pa) (((channel_idx) >> 4) ^ \
+ (((pa) >> 20) & 0x1ULL & adev->df.hash_status.hash_64k) ^ \
+ (((pa) >> 25) & 0x1ULL & adev->df.hash_status.hash_2m) ^ \
+ (((pa) >> 34) & 0x1ULL & adev->df.hash_status.hash_1g))
+#define SET_CHANNEL_HASH(channel_idx, pa) do { \
+ (pa) &= ~(0x1ULL << UMC_V6_7_PA_CH4_BIT); \
+ (pa) |= (CHANNEL_HASH(channel_idx, pa) << UMC_V6_7_PA_CH4_BIT); \
+ } while (0)
+
+extern struct amdgpu_umc_ras umc_v6_7_ras;
+extern const uint32_t
+ umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
+extern const uint32_t
+ umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
new file mode 100644
index 000000000..46bfdee79
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_10.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_10_0_offset.h"
+#include "umc/umc_8_10_0_sh_mask.h"
+
+#define UMC_8_NODE_DIST 0x800000
+#define UMC_8_INST_DIST 0x4000
+
+struct channelnum_map_colbit {
+ uint32_t channel_num;
+ uint32_t col_bit;
+};
+
+const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
+ {24, 13},
+ {20, 13},
+ {16, 12},
+ {14, 12},
+ {12, 12},
+ {10, 12},
+ {6, 11},
+};
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{1, 5}, {7, 3}},
+ {{14, 15}, {13, 12}},
+ {{10, 11}, {9, 8}},
+ {{6, 2}, {0, 4}}
+ };
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{16, 18}, {17, 19}},
+ {{15, 11}, {3, 7}},
+ {{1, 5}, {13, 9}},
+ {{23, 21}, {22, 20}},
+ {{0, 4}, {12, 8}},
+ {{14, 10}, {2, 6}}
+ };
+
+static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
+ UMC_8_NODE_DIST * node_inst;
+}
+
+static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 8_10 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Rely on MCUMC_STATUS for correctable error counter
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check the MCUMC_STATUS. */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v8_10_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_10_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_ecc_error_count, ras_error_status);
+
+ umc_v8_10_clear_error_count(adev);
+}
+
+static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
+{
+ uint32_t t = 0;
+
+ for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
+ if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
+ return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
+
+ /* Failed to get col_bit. */
+ return U32_MAX;
+}
+
+/*
+ * Mapping normal address to soc physical address in swizzle mode.
+ */
+static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
+ uint32_t channel_idx,
+ uint64_t na, uint64_t *soc_pa)
+{
+ uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
+ uint64_t tmp_addr;
+
+ if (col_bit == U32_MAX)
+ return -1;
+
+ tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
+ *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_MID(na, col_bit) |
+ SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_LSB(na);
+
+ return 0;
+}
+
+static void umc_v8_10_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst,
+ uint32_t node_inst, uint64_t mc_umc_status)
+{
+ uint64_t na_err_addr_base;
+ uint64_t na_err_addr, retired_page_addr;
+ uint32_t channel_index, addr_lsb, col = 0;
+ int ret = 0;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst];
+
+ /* the lowest lsb bits should be ignored */
+ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
+ err_addr &= ~((0x1ULL << addr_lsb) - 1);
+ na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+
+ /* loop for all possibilities of [C6 C5] in normal address. */
+ for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+ na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+ /* Mapping normal error address to retired soc physical address. */
+ ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+ na_err_addr, &retired_page_addr);
+ if (ret) {
+ dev_err(adev->dev, "Failed to map pa from umc na.\n");
+ break;
+ }
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+ retired_page_addr);
+ amdgpu_umc_fill_error_record(err_data, na_err_addr,
+ retired_page_addr, channel_index, umc_inst);
+ }
+}
+
+static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_query_error_address, ras_error_status);
+}
+
+static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because UMCCH0_0_GeccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
+ *error_count += 1;
+ }
+}
+
+static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
+ *error_count += 1;
+ }
+}
+
+static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+ umc_v8_10_ecc_info_query_correctable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
+ node_inst, umc_inst, ch_inst,
+ &(err_data->ue_count));
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status);
+}
+
+static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t eccinfo_table_idx;
+ uint64_t mc_umc_status, err_addr;
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst;
+
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr)
+ return 0;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1)) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_10_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst, node_inst, mc_umc_status);
+ }
+
+ return 0;
+}
+
+static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_10_ecc_info_query_error_address, ras_error_status);
+}
+
+static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr)
+{
+ hdr->version = RAS_TABLE_VER_V2_1;
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_10_query_ras_error_count,
+ .query_ras_error_address = umc_v8_10_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v8_10_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_10_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_10_err_cnt_init,
+ .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
+ .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
+ .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
new file mode 100644
index 000000000..dc12e0af5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_10_H__
+#define __UMC_V8_10_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_10_UMC_INSTANCE_NUM 2
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * \
+ (adev)->gmc.num_umc - hweight32((adev)->gmc.m_half_use) * 2)
+
+/* UMC regiser per channel offset */
+#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
+
+/* EccErrCnt max value */
+#define UMC_V8_10_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD)
+
+#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4
+
+/* The C5 bit in NA address */
+#define UMC_V8_10_NA_C5_BIT 14
+
+/* Map to swizzle mode address */
+#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \
+ ((((na) >> 10) * (ch_num) + (ch_idx)) << 10)
+#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \
+ (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2))
+#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit))
+#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \
+ ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8)
+#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF)
+
+extern struct amdgpu_umc_ras umc_v8_10_ras;
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl_ext0[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
new file mode 100644
index 000000000..b717fdaa4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_7.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+
+#include "rsmu/rsmu_0_0_2_offset.h"
+#include "rsmu/rsmu_0_0_2_sh_mask.h"
+#include "umc/umc_8_7_0_offset.h"
+#include "umc/umc_8_7_0_sh_mask.h"
+
+#define UMC_8_INST_DIST 0x40000
+
+const uint32_t
+ umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM] = {
+ {2, 11}, {4, 13},
+ {1, 8}, {7, 14},
+ {10, 3}, {12, 5},
+ {9, 0}, {15, 6}
+};
+
+static inline uint32_t get_umc_v8_7_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst;
+}
+
+static void umc_v8_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check for SRAM correctable error
+ * MCUMC_STATUS is a 64 bit register
+ */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_inst, uint32_t ch_inst,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC registers. Will add the protection
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_correctable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ce_count));
+ umc_v8_7_ecc_info_querry_uncorrectable_error_count(adev,
+ umc_inst, ch_inst,
+ &(err_data->ue_count));
+ }
+}
+
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
+{
+ uint64_t retired_page;
+ uint32_t channel_index;
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+}
+
+static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr)
+ return;
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+}
+
+static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+
+ /* TODO: driver needs to toggle DF Cstate to ensure
+ * safe access of UMC resgisters. Will add the protection
+ * when firmware interface is ready
+ */
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_v8_7_ecc_info_query_error_address(adev,
+ err_data,
+ ch_inst,
+ umc_inst);
+ }
+}
+
+static void umc_v8_7_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
+
+ /* select the lower chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear lower chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_7_CE_CNT_INIT);
+
+ /* select the higher chip */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
+ umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
+ UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
+ ecc_err_cnt_sel);
+
+ /* clear higher chip error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_7_CE_CNT_INIT);
+}
+
+static void umc_v8_7_clear_error_count(struct amdgpu_device *adev)
+{
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_7_clear_error_count_per_channel(adev,
+ umc_reg_offset);
+ }
+}
+
+static void umc_v8_7_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 8_7_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 0);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_7_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_7_CE_CNT_INIT);
+
+ /* check for SRAM correctable error
+ MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* check the MCUMC_STATUS */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_7_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_7_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ umc_v8_7_clear_error_count(adev);
+}
+
+static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t umc_reg_offset,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint32_t lsb, mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ /* the lowest lsb bits should be ignored */
+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr &= ~((0x1ULL << lsb) - 1);
+
+ umc_v8_7_convert_error_address(adev, err_data, err_addr,
+ ch_inst, umc_inst);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+}
+
+static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_7_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ ch_inst,
+ umc_inst);
+ }
+}
+
+static void umc_v8_7_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
+
+ /* select the lower chip and check the error count */
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 0);
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_7_CE_CNT_INIT);
+
+ /* select the higher chip and check the err counter */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrCntCsSel, 1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_7_CE_CNT_INIT);
+}
+
+static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
+{
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_7_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_7_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_7_query_ras_error_count,
+ .query_ras_error_address = umc_v8_7_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v8_7_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_7_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_7_err_cnt_init,
+ .ecc_info_query_ras_error_count = umc_v8_7_ecc_info_query_ras_error_count,
+ .ecc_info_query_ras_error_address = umc_v8_7_ecc_info_query_ras_error_address,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
new file mode 100644
index 000000000..dd4993f5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_7_H__
+#define __UMC_V8_7_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* HBM Memory Channel Width */
+#define UMC_V8_7_HBM_MEMORY_CHANNEL_WIDTH 128
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_7_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_7_UMC_INSTANCE_NUM 8
+/* total channel instances in one umc block */
+#define UMC_V8_7_TOTAL_CHANNEL_NUM (UMC_V8_7_CHANNEL_INSTANCE_NUM * UMC_V8_7_UMC_INSTANCE_NUM)
+/* UMC regiser per channel offset */
+#define UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA 0x400
+
+/* EccErrCnt max value */
+#define UMC_V8_7_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_7_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_7_ras;
+extern const uint32_t
+ umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM];
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
new file mode 100644
index 000000000..5534c769b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Sonny Jiang <sonny.jiang@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "sid.h"
+
+#include "uvd/uvd_3_1_d.h"
+#include "uvd/uvd_3_1_sh_mask.h"
+
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+/**
+ * uvd_v3_1_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v3_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v3_1_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v3_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v3_1_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v3_1_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: iob associated with the indirect buffer
+ * @ib: indirect buffer to execute
+ * @flags: flags associated with the indirect buffer
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
+ amdgpu_ring_write(ring, ib->gpu_addr);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v3_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v3_1_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v3_1_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static void uvd_v3_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static const struct amdgpu_ring_funcs uvd_v3_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v3_1_ring_get_rptr,
+ .get_wptr = uvd_v3_1_ring_get_wptr,
+ .set_wptr = uvd_v3_1_ring_set_wptr,
+ .parse_cs = amdgpu_uvd_ring_parse_cs,
+ .emit_frame_size =
+ 14, /* uvd_v3_1_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* uvd_v3_1_ring_emit_ib */
+ .emit_ib = uvd_v3_1_ring_emit_ib,
+ .emit_fence = uvd_v3_1_ring_emit_fence,
+ .test_ring = uvd_v3_1_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v3_1_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+};
+
+static void uvd_v3_1_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->ring.funcs = &uvd_v3_1_ring_funcs;
+}
+
+static void uvd_v3_1_set_dcm(struct amdgpu_device *adev,
+ bool sw_mode)
+{
+ u32 tmp, tmp2;
+
+ WREG32_FIELD(UVD_CGC_GATE, REGS, 0);
+
+ tmp = RREG32(mmUVD_CGC_CTRL);
+ tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+ tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) |
+ (4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT);
+
+ if (sw_mode) {
+ tmp &= ~0x7ffff800;
+ tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK |
+ UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK |
+ (7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT);
+ } else {
+ tmp |= 0x7ffff800;
+ tmp2 = 0;
+ }
+
+ WREG32(mmUVD_CGC_CTRL, tmp);
+ WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
+}
+
+/**
+ * uvd_v3_1_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
+{
+ uint64_t addr;
+ uint32_t size;
+
+ /* programm the VCPU memory controller bits 0-27 */
+ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+ size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
+
+ addr += size;
+ size = AMDGPU_UVD_HEAP_SIZE >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
+
+ addr += size;
+ size = (AMDGPU_UVD_STACK_SIZE +
+ (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ /* bits 28-31 */
+ addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
+ WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+ /* bits 32-39 */
+ addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
+ WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+/**
+ * uvd_v3_1_fw_validate - FW validation operation
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialate and check UVD validation.
+ */
+static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t keysel = adev->uvd.keyselect;
+
+ WREG32(mmUVD_FW_START, keysel);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__DONE_MASK)
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__PASS_MASK))
+ return -EINVAL;
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__BUSY_MASK))
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/**
+ * uvd_v3_1_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v3_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t rb_bufsz;
+ int i, j, r;
+ u32 tmp;
+ /* disable byte swapping */
+ u32 lmi_swap_cntl = 0;
+ u32 mp_swap_cntl = 0;
+
+ /* set uvd busy */
+ WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2));
+
+ uvd_v3_1_set_dcm(adev, true);
+ WREG32(mmUVD_CGC_GATE, 0);
+
+ /* take UVD block out of reset */
+ WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 1 << 9);
+
+ /* disable interrupt */
+ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+ /* initialize UVD memory controller */
+ WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+ (1 << 21) | (1 << 9) | (1 << 20));
+
+ tmp = RREG32(mmUVD_MPC_CNTL);
+ WREG32(mmUVD_MPC_CNTL, tmp | 0x10);
+
+ WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32(mmUVD_MPC_SET_ALU, 0);
+ WREG32(mmUVD_MPC_SET_MUX, 0x88);
+
+ tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL);
+ WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10));
+
+ /* enable UMC */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable interrupt */
+ WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+ WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
+
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Set the write pointer delay */
+ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* programm the 4GB memory segment for rptr and ring buffer */
+ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+ (0x7 << 16) | (0x1 << 31));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmUVD_RBC_RB_RPTR, 0x0);
+
+ ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+
+ /* set the ring address */
+ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr);
+
+ /* Set ring buffer size */
+ rb_bufsz = order_base_2(ring->ring_size);
+ rb_bufsz = (0x1 << 8) | rb_bufsz;
+ WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+
+ return 0;
+}
+
+/**
+ * uvd_v3_1_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v3_1_stop(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t status;
+
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(1);
+ }
+ if (status & 2)
+ break;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0xf)
+ break;
+ mdelay(1);
+ }
+ if (status & 0xf)
+ break;
+ }
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+ if (status & 0x240)
+ break;
+ }
+
+ WREG32_P(0x3D49, 0, ~(1 << 2));
+
+ WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9));
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32(mmUVD_STATUS, 0);
+
+ uvd_v3_1_set_dcm(adev, false);
+}
+
+static int uvd_v3_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int uvd_v3_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: UVD TRAP\n");
+ amdgpu_fence_process(&adev->uvd.inst->ring);
+ return 0;
+}
+
+
+static const struct amdgpu_irq_src_funcs uvd_v3_1_irq_funcs = {
+ .set = uvd_v3_1_set_interrupt_state,
+ .process = uvd_v3_1_process_interrupt,
+};
+
+static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v3_1_irq_funcs;
+}
+
+
+static int uvd_v3_1_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
+
+ uvd_v3_1_set_ring_funcs(adev);
+ uvd_v3_1_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v3_1_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+ void *ptr;
+ uint32_t ucode_len;
+
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ ring = &adev->uvd.inst->ring;
+ sprintf(ring->name, "uvd");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ /* Retrieval firmware validate key */
+ ptr = adev->uvd.inst[0].cpu_addr;
+ ptr += 192 + 16;
+ memcpy(&ucode_len, ptr, 4);
+ ptr += ucode_len;
+ memcpy(&adev->uvd.keyselect, ptr, 4);
+
+ r = amdgpu_uvd_entity_init(adev);
+
+ return r;
+}
+
+static int uvd_v3_1_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0x3fff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0x3fff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
+/**
+ * uvd_v3_1_hw_init - start and test UVD block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v3_1_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t tmp;
+ int r;
+
+ uvd_v3_1_mc_resume(adev);
+
+ r = uvd_v3_1_fw_validate(adev);
+ if (r) {
+ DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r);
+ return r;
+ }
+
+ uvd_v3_1_enable_mgcg(adev, true);
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+
+ uvd_v3_1_start(adev);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r) {
+ DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r);
+ goto done;
+ }
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ goto done;
+ }
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+
+done:
+ if (!r)
+ DRM_INFO("UVD initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * uvd_v3_1_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v3_1_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (RREG32(mmUVD_STATUS) != 0)
+ uvd_v3_1_stop(adev);
+
+ return 0;
+}
+
+static int uvd_v3_1_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v3_1_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v3_1_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v3_1_hw_init(adev);
+}
+
+static bool uvd_v3_1_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v3_1_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+static int uvd_v3_1_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ uvd_v3_1_stop(adev);
+
+ WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK,
+ ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ return uvd_v3_1_start(adev);
+}
+
+static int uvd_v3_1_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int uvd_v3_1_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
+ .name = "uvd_v3_1",
+ .early_init = uvd_v3_1_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v3_1_sw_init,
+ .sw_fini = uvd_v3_1_sw_fini,
+ .hw_init = uvd_v3_1_hw_init,
+ .hw_fini = uvd_v3_1_hw_fini,
+ .suspend = uvd_v3_1_suspend,
+ .resume = uvd_v3_1_resume,
+ .is_idle = uvd_v3_1_is_idle,
+ .wait_for_idle = uvd_v3_1_wait_for_idle,
+ .soft_reset = uvd_v3_1_soft_reset,
+ .set_clockgating_state = uvd_v3_1_set_clockgating_state,
+ .set_powergating_state = uvd_v3_1_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &uvd_v3_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h
new file mode 100644
index 000000000..8c2f9b207
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V3_1_H__
+#define __UVD_V3_1_H__
+
+extern const struct amdgpu_ip_block_version uvd_v3_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
new file mode 100644
index 000000000..c108b8381
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "cikd.h"
+
+#include "uvd/uvd_4_2_d.h"
+#include "uvd/uvd_4_2_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#include "bif/bif_4_1_d.h"
+
+#include "smu/smu_7_0_1_d.h"
+#include "smu/smu_7_0_1_sh_mask.h"
+
+static void uvd_v4_2_mc_resume(struct amdgpu_device *adev);
+static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
+static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
+static int uvd_v4_2_start(struct amdgpu_device *adev);
+static void uvd_v4_2_stop(struct amdgpu_device *adev);
+static int uvd_v4_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
+ bool sw_mode);
+/**
+ * uvd_v4_2_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v4_2_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v4_2_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+static int uvd_v4_2_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
+
+ uvd_v4_2_set_ring_funcs(adev);
+ uvd_v4_2_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v4_2_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ ring = &adev->uvd.inst->ring;
+ sprintf(ring->name, "uvd");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
+
+ return r;
+}
+
+static int uvd_v4_2_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
+ bool enable);
+/**
+ * uvd_v4_2_hw_init - start and test UVD block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v4_2_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t tmp;
+ int r;
+
+ uvd_v4_2_enable_mgcg(adev, true);
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ goto done;
+ }
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+
+done:
+ if (!r)
+ DRM_INFO("UVD initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * uvd_v4_2_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v4_2_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (RREG32(mmUVD_STATUS) != 0)
+ uvd_v4_2_stop(adev);
+
+ return 0;
+}
+
+static int uvd_v4_2_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v4_2_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v4_2_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v4_2_hw_init(adev);
+}
+
+/**
+ * uvd_v4_2_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v4_2_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t rb_bufsz;
+ int i, j, r;
+ u32 tmp;
+ /* disable byte swapping */
+ u32 lmi_swap_cntl = 0;
+ u32 mp_swap_cntl = 0;
+
+ /* set uvd busy */
+ WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2));
+
+ uvd_v4_2_set_dcm(adev, true);
+ WREG32(mmUVD_CGC_GATE, 0);
+
+ /* take UVD block out of reset */
+ WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 1 << 9);
+
+ /* disable interupt */
+ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+ /* initialize UVD memory controller */
+ WREG32(mmUVD_LMI_CTRL, 0x203108);
+
+ tmp = RREG32(mmUVD_MPC_CNTL);
+ WREG32(mmUVD_MPC_CNTL, tmp | 0x10);
+
+ WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32(mmUVD_MPC_SET_ALU, 0);
+ WREG32(mmUVD_MPC_SET_MUX, 0x88);
+
+ uvd_v4_2_mc_resume(adev);
+
+ tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL);
+ WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10));
+
+ /* enable UMC */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable interupt */
+ WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+ WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
+
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Set the write pointer delay */
+ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* program the 4GB memory segment for rptr and ring buffer */
+ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+ (0x7 << 16) | (0x1 << 31));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmUVD_RBC_RB_RPTR, 0x0);
+
+ ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+
+ /* set the ring address */
+ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr);
+
+ /* Set ring buffer size */
+ rb_bufsz = order_base_2(ring->ring_size);
+ rb_bufsz = (0x1 << 8) | rb_bufsz;
+ WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+
+ return 0;
+}
+
+/**
+ * uvd_v4_2_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v4_2_stop(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t status;
+
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(1);
+ }
+ if (status & 2)
+ break;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0xf)
+ break;
+ mdelay(1);
+ }
+ if (status & 0xf)
+ break;
+ }
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+ if (status & 0x240)
+ break;
+ }
+
+ WREG32_P(0x3D49, 0, ~(1 << 2));
+
+ WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9));
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32(mmUVD_STATUS, 0);
+
+ uvd_v4_2_set_dcm(adev, false);
+}
+
+/**
+ * uvd_v4_2_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v4_2_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v4_2_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: iob associated with the indirect buffer
+ * @ib: indirect buffer to execute
+ * @flags: flags associated with the indirect buffer
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
+ amdgpu_ring_write(ring, ib->gpu_addr);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+/**
+ * uvd_v4_2_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
+{
+ uint64_t addr;
+ uint32_t size;
+
+ /* program the VCPU memory controller bits 0-27 */
+ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+ size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
+
+ addr += size;
+ size = AMDGPU_UVD_HEAP_SIZE >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
+
+ addr += size;
+ size = (AMDGPU_UVD_STACK_SIZE +
+ (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ /* bits 28-31 */
+ addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
+ WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+ /* bits 32-39 */
+ addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
+ WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
+static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
+ bool sw_mode)
+{
+ u32 tmp, tmp2;
+
+ WREG32_FIELD(UVD_CGC_GATE, REGS, 0);
+
+ tmp = RREG32(mmUVD_CGC_CTRL);
+ tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+ tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) |
+ (4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT);
+
+ if (sw_mode) {
+ tmp &= ~0x7ffff800;
+ tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK |
+ UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK |
+ (7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT);
+ } else {
+ tmp |= 0x7ffff800;
+ tmp2 = 0;
+ }
+
+ WREG32(mmUVD_CGC_CTRL, tmp);
+ WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
+}
+
+static bool uvd_v4_2_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v4_2_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+static int uvd_v4_2_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ uvd_v4_2_stop(adev);
+
+ WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK,
+ ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ return uvd_v4_2_start(adev);
+}
+
+static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ // TODO
+ return 0;
+}
+
+static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: UVD TRAP\n");
+ amdgpu_fence_process(&adev->uvd.inst->ring);
+ return 0;
+}
+
+static int uvd_v4_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int uvd_v4_2_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the UVD block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE) {
+ uvd_v4_2_stop(adev);
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
+ if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
+ CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
+ WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
+ UVD_PGFSM_CONFIG__UVD_PGFSM_POWER_DOWN_MASK |
+ UVD_PGFSM_CONFIG__UVD_PGFSM_P1_SELECT_MASK));
+ mdelay(20);
+ }
+ }
+ return 0;
+ } else {
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
+ if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+ CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
+ WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
+ UVD_PGFSM_CONFIG__UVD_PGFSM_POWER_UP_MASK |
+ UVD_PGFSM_CONFIG__UVD_PGFSM_P1_SELECT_MASK));
+ mdelay(30);
+ }
+ }
+ return uvd_v4_2_start(adev);
+ }
+}
+
+static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
+ .name = "uvd_v4_2",
+ .early_init = uvd_v4_2_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v4_2_sw_init,
+ .sw_fini = uvd_v4_2_sw_fini,
+ .hw_init = uvd_v4_2_hw_init,
+ .hw_fini = uvd_v4_2_hw_fini,
+ .suspend = uvd_v4_2_suspend,
+ .resume = uvd_v4_2_resume,
+ .is_idle = uvd_v4_2_is_idle,
+ .wait_for_idle = uvd_v4_2_wait_for_idle,
+ .soft_reset = uvd_v4_2_soft_reset,
+ .set_clockgating_state = uvd_v4_2_set_clockgating_state,
+ .set_powergating_state = uvd_v4_2_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v4_2_ring_get_rptr,
+ .get_wptr = uvd_v4_2_ring_get_wptr,
+ .set_wptr = uvd_v4_2_ring_set_wptr,
+ .parse_cs = amdgpu_uvd_ring_parse_cs,
+ .emit_frame_size =
+ 14, /* uvd_v4_2_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* uvd_v4_2_ring_emit_ib */
+ .emit_ib = uvd_v4_2_ring_emit_ib,
+ .emit_fence = uvd_v4_2_ring_emit_fence,
+ .test_ring = uvd_v4_2_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v4_2_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+};
+
+static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
+ .set = uvd_v4_2_set_interrupt_state,
+ .process = uvd_v4_2_process_interrupt,
+};
+
+static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version uvd_v4_2_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 4,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &uvd_v4_2_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.h b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.h
new file mode 100644
index 000000000..8a0444bb8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V4_2_H__
+#define __UVD_V4_2_H__
+
+extern const struct amdgpu_ip_block_version uvd_v4_2_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
new file mode 100644
index 000000000..d7e31e48a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -0,0 +1,921 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "vid.h"
+#include "uvd/uvd_5_0_d.h"
+#include "uvd/uvd_5_0_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+#include "bif/bif_5_0_d.h"
+#include "vi.h"
+#include "smu/smu_7_1_2_d.h"
+#include "smu/smu_7_1_2_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
+static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int uvd_v5_0_start(struct amdgpu_device *adev);
+static void uvd_v5_0_stop(struct amdgpu_device *adev);
+static int uvd_v5_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable);
+/**
+ * uvd_v5_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v5_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v5_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+static int uvd_v5_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
+
+ uvd_v5_0_set_ring_funcs(adev);
+ uvd_v5_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v5_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ ring = &adev->uvd.inst->ring;
+ sprintf(ring->name, "uvd");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
+
+ return r;
+}
+
+static int uvd_v5_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+/**
+ * uvd_v5_0_hw_init - start and test UVD block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v5_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t tmp;
+ int r;
+
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+ uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_enable_mgcg(adev, true);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ goto done;
+ }
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+
+done:
+ if (!r)
+ DRM_INFO("UVD initialized successfully.\n");
+
+ return r;
+
+}
+
+/**
+ * uvd_v5_0_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v5_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (RREG32(mmUVD_STATUS) != 0)
+ uvd_v5_0_stop(adev);
+
+ return 0;
+}
+
+static int uvd_v5_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v5_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v5_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v5_0_hw_init(adev);
+}
+
+/**
+ * uvd_v5_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint64_t offset;
+ uint32_t size;
+
+ /* program memory controller bits 0-27 */
+ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst->gpu_addr));
+ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst->gpu_addr));
+
+ offset = AMDGPU_UVD_FIRMWARE_OFFSET;
+ size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
+ WREG32(mmUVD_VCPU_CACHE_OFFSET0, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = AMDGPU_UVD_HEAP_SIZE;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = AMDGPU_UVD_STACK_SIZE +
+ (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles);
+ WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+/**
+ * uvd_v5_0_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v5_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ uint32_t mp_swap_cntl;
+ int i, j, r;
+
+ /*disable DPG */
+ WREG32_P(mmUVD_POWER_STATUS, 0, ~(1 << 2));
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+ mp_swap_cntl = 0;
+
+ uvd_v5_0_mc_resume(adev);
+
+ /* disable interupt */
+ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+
+ /* stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+ mdelay(1);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* take UVD block out of reset */
+ WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ /* initialize UVD memory controller */
+ WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+ (1 << 21) | (1 << 9) | (1 << 20));
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+ WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32(mmUVD_MPC_SET_ALU, 0);
+ WREG32(mmUVD_MPC_SET_MUX, 0x88);
+
+ /* take all subblocks out of reset, except VCPU */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 1 << 9);
+
+ /* enable UMC */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ /* boot up the VCPU */
+ WREG32(mmUVD_SOFT_RESET, 0);
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD not responding, giving up!!!\n");
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(mmUVD_MASTINT_EN, 3 << 1, ~(3 << 1));
+
+ /* clear the bit 4 of UVD_STATUS */
+ WREG32_P(mmUVD_STATUS, 0, ~(2 << 1));
+
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = 0;
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32(mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+
+ WREG32_P(mmUVD_RBC_RB_CNTL, 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ return 0;
+}
+
+/**
+ * uvd_v5_0_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v5_0_stop(struct amdgpu_device *adev)
+{
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+ mdelay(1);
+
+ /* put VCPU into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* disable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 0x0);
+
+ /* Unstall UMC and register bus */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ WREG32(mmUVD_STATUS, 0);
+}
+
+/**
+ * uvd_v5_0_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v5_0_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v5_0_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH, 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool uvd_v5_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v5_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+static int uvd_v5_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ uvd_v5_0_stop(adev);
+
+ WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK,
+ ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ return uvd_v5_0_start(adev);
+}
+
+static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ // TODO
+ return 0;
+}
+
+static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: UVD TRAP\n");
+ amdgpu_fence_process(&adev->uvd.inst->ring);
+ return 0;
+}
+
+static void uvd_v5_0_enable_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t data1, data3, suvd_flags;
+
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+ data3 = RREG32(mmUVD_CGC_GATE);
+
+ suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK;
+
+ if (enable) {
+ data3 |= (UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__SCPU_MASK);
+ /* only in pg enabled, we can gate clock to vcpu*/
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD)
+ data3 |= UVD_CGC_GATE__VCPU_MASK;
+ data3 &= ~UVD_CGC_GATE__REGS_MASK;
+ data1 |= suvd_flags;
+ } else {
+ data3 = 0;
+ data1 = 0;
+ }
+
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+ WREG32(mmUVD_CGC_GATE, data3);
+}
+
+static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data2;
+
+ data = RREG32(mmUVD_CGC_CTRL);
+ data2 = RREG32(mmUVD_SUVD_CGC_CTRL);
+
+
+ data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
+ UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+
+
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
+ (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
+
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__JPEG_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
+
+ WREG32(mmUVD_CGC_CTRL, data);
+ WREG32(mmUVD_SUVD_CGC_CTRL, data2);
+}
+
+#if 0
+static void uvd_v5_0_set_hw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data1, cgc_flags, suvd_flags;
+
+ data = RREG32(mmUVD_CGC_GATE);
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+
+ cgc_flags = UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__SCPU_MASK;
+
+ suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK;
+
+ data |= cgc_flags;
+ data1 |= suvd_flags;
+
+ WREG32(mmUVD_CGC_GATE, data);
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+}
+#endif
+
+static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
+static int uvd_v5_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (uvd_v5_0_wait_for_idle(handle))
+ return -EBUSY;
+ uvd_v5_0_enable_clock_gating(adev, true);
+
+ /* enable HW gates because UVD is idle */
+/* uvd_v5_0_set_hw_clock_gating(adev); */
+ } else {
+ uvd_v5_0_enable_clock_gating(adev, false);
+ }
+
+ uvd_v5_0_set_sw_clock_gating(adev);
+ return 0;
+}
+
+static int uvd_v5_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the UVD block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ if (state == AMD_PG_STATE_GATE) {
+ uvd_v5_0_stop(adev);
+ } else {
+ ret = uvd_v5_0_start(adev);
+ if (ret)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ mutex_lock(&adev->pm.mutex);
+
+ if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+ CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
+ DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
+ goto out;
+ }
+
+ /* AMD_CG_SUPPORT_UVD_MGCG */
+ data = RREG32(mmUVD_CGC_CTRL);
+ if (data & UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK)
+ *flags |= AMD_CG_SUPPORT_UVD_MGCG;
+
+out:
+ mutex_unlock(&adev->pm.mutex);
+}
+
+static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
+ .name = "uvd_v5_0",
+ .early_init = uvd_v5_0_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v5_0_sw_init,
+ .sw_fini = uvd_v5_0_sw_fini,
+ .hw_init = uvd_v5_0_hw_init,
+ .hw_fini = uvd_v5_0_hw_fini,
+ .suspend = uvd_v5_0_suspend,
+ .resume = uvd_v5_0_resume,
+ .is_idle = uvd_v5_0_is_idle,
+ .wait_for_idle = uvd_v5_0_wait_for_idle,
+ .soft_reset = uvd_v5_0_soft_reset,
+ .set_clockgating_state = uvd_v5_0_set_clockgating_state,
+ .set_powergating_state = uvd_v5_0_set_powergating_state,
+ .get_clockgating_state = uvd_v5_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v5_0_ring_get_rptr,
+ .get_wptr = uvd_v5_0_ring_get_wptr,
+ .set_wptr = uvd_v5_0_ring_set_wptr,
+ .parse_cs = amdgpu_uvd_ring_parse_cs,
+ .emit_frame_size =
+ 14, /* uvd_v5_0_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 6, /* uvd_v5_0_ring_emit_ib */
+ .emit_ib = uvd_v5_0_ring_emit_ib,
+ .emit_fence = uvd_v5_0_ring_emit_fence,
+ .test_ring = uvd_v5_0_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v5_0_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+};
+
+static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
+ .set = uvd_v5_0_set_interrupt_state,
+ .process = uvd_v5_0_process_interrupt,
+};
+
+static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version uvd_v5_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &uvd_v5_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.h b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.h
new file mode 100644
index 000000000..2eaaea793
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V5_0_H__
+#define __UVD_V5_0_H__
+
+extern const struct amdgpu_ip_block_version uvd_v5_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
new file mode 100644
index 000000000..5fe872f4b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -0,0 +1,1687 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "vid.h"
+#include "uvd/uvd_6_0_d.h"
+#include "uvd/uvd_6_0_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+#include "smu/smu_7_1_3_d.h"
+#include "smu/smu_7_1_3_sh_mask.h"
+#include "bif/bif_5_1_d.h"
+#include "gmc/gmc_8_1_d.h"
+#include "vi.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+/* Polaris10/11/12 firmware version */
+#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8))
+
+static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+
+static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int uvd_v6_0_start(struct amdgpu_device *adev);
+static void uvd_v6_0_stop(struct amdgpu_device *adev);
+static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
+static int uvd_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable);
+
+/**
+* uvd_v6_0_enc_support - get encode support status
+*
+* @adev: amdgpu_device pointer
+*
+* Returns the current hardware encode support status
+*/
+static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
+{
+ return ((adev->asic_type >= CHIP_POLARIS10) &&
+ (adev->asic_type <= CHIP_VEGAM) &&
+ (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
+}
+
+/**
+ * uvd_v6_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v6_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->uvd.inst->ring_enc[0])
+ return RREG32(mmUVD_RB_RPTR);
+ else
+ return RREG32(mmUVD_RB_RPTR2);
+}
+/**
+ * uvd_v6_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v6_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->uvd.inst->ring_enc[0])
+ return RREG32(mmUVD_RB_WPTR);
+ else
+ return RREG32(mmUVD_RB_WPTR2);
+}
+
+/**
+ * uvd_v6_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v6_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->uvd.inst->ring_enc[0])
+ WREG32(mmUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ else
+ WREG32(mmUVD_RB_WPTR2,
+ lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v6_0_enc_ring_test_ring - test if UVD ENC ring is working
+ *
+ * @ring: the engine to test on
+ *
+ */
+static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t rptr;
+ unsigned i;
+ int r;
+
+ r = amdgpu_ring_alloc(ring, 16);
+ if (r)
+ return r;
+
+ rptr = amdgpu_ring_get_rptr(ring);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (amdgpu_ring_get_rptr(ring) != rptr)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v6_0_enc_get_create_msg - generate a UVD ENC create msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: session handle to use
+ * @bo: amdgpu object for which we query the offset
+ * @fence: optional fence to return
+ *
+ * Open up a stream for HW test
+ */
+static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
+{
+ const unsigned ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = amdgpu_bo_gpu_offset(bo);
+
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = 0x00010000;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: session handle to use
+ * @bo: amdgpu object for which we query the offset
+ * @fence: optional fence to return
+ *
+ * Close up a stream for HW test or if userspace failed to do so
+ */
+static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
+ uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
+{
+ const unsigned ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = amdgpu_bo_gpu_offset(bo);
+
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = 0x00010000;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * uvd_v6_0_enc_ring_test_ib - test if UVD ENC IBs are working
+ *
+ * @ring: the engine to test on
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ */
+static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
+ long r;
+
+ r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
+ if (r)
+ goto error;
+
+ r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+error:
+ dma_fence_put(fence);
+ return r;
+}
+
+static int uvd_v6_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK))
+ return -ENOENT;
+
+ uvd_v6_0_set_ring_funcs(adev);
+
+ if (uvd_v6_0_enc_support(adev)) {
+ adev->uvd.num_enc_rings = 2;
+ uvd_v6_0_set_enc_ring_funcs(adev);
+ }
+
+ uvd_v6_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v6_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+
+ /* UVD ENC TRAP */
+ if (uvd_v6_0_enc_support(adev)) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+ }
+ }
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ if (!uvd_v6_0_enc_support(adev)) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ adev->uvd.inst->ring_enc[i].funcs = NULL;
+
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.num_enc_rings = 0;
+
+ DRM_INFO("UVD ENC is disabled\n");
+ }
+
+ ring = &adev->uvd.inst->ring;
+ sprintf(ring->name, "uvd");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ if (uvd_v6_0_enc_support(adev)) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst->ring_enc[i];
+ sprintf(ring->name, "uvd_enc%d", i);
+ r = amdgpu_ring_init(adev, ring, 512,
+ &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ r = amdgpu_uvd_entity_init(adev);
+
+ return r;
+}
+
+static int uvd_v6_0_sw_fini(void *handle)
+{
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ if (uvd_v6_0_enc_support(adev)) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
+ }
+
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+/**
+ * uvd_v6_0_hw_init - start and test UVD block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v6_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t tmp;
+ int i, r;
+
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+ uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_enable_mgcg(adev, true);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ goto done;
+ }
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+
+ if (uvd_v6_0_enc_support(adev)) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst->ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+
+done:
+ if (!r) {
+ if (uvd_v6_0_enc_support(adev))
+ DRM_INFO("UVD and UVD ENC initialized successfully.\n");
+ else
+ DRM_INFO("UVD initialized successfully.\n");
+ }
+
+ return r;
+}
+
+/**
+ * uvd_v6_0_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v6_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (RREG32(mmUVD_STATUS) != 0)
+ uvd_v6_0_stop(adev);
+
+ return 0;
+}
+
+static int uvd_v6_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v6_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v6_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v6_0_hw_init(adev);
+}
+
+/**
+ * uvd_v6_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint64_t offset;
+ uint32_t size;
+
+ /* program memory controller bits 0-27 */
+ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst->gpu_addr));
+ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst->gpu_addr));
+
+ offset = AMDGPU_UVD_FIRMWARE_OFFSET;
+ size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
+ WREG32(mmUVD_VCPU_CACHE_OFFSET0, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = AMDGPU_UVD_HEAP_SIZE;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = AMDGPU_UVD_STACK_SIZE +
+ (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles);
+ WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
+ WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+ WREG32(mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+}
+
+#if 0
+static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, data1;
+
+ data = RREG32(mmUVD_CGC_GATE);
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+ if (enable) {
+ data |= UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__SCPU_MASK;
+ data1 |= UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK |
+ UVD_SUVD_CGC_GATE__SRE_H264_MASK |
+ UVD_SUVD_CGC_GATE__SRE_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SIT_H264_MASK |
+ UVD_SUVD_CGC_GATE__SIT_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SCM_H264_MASK |
+ UVD_SUVD_CGC_GATE__SCM_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SDB_H264_MASK |
+ UVD_SUVD_CGC_GATE__SDB_HEVC_MASK;
+ } else {
+ data &= ~(UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__LMI_UMC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__SCPU_MASK);
+ data1 &= ~(UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK |
+ UVD_SUVD_CGC_GATE__SRE_H264_MASK |
+ UVD_SUVD_CGC_GATE__SRE_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SIT_H264_MASK |
+ UVD_SUVD_CGC_GATE__SIT_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SCM_H264_MASK |
+ UVD_SUVD_CGC_GATE__SCM_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SDB_H264_MASK |
+ UVD_SUVD_CGC_GATE__SDB_HEVC_MASK);
+ }
+ WREG32(mmUVD_CGC_GATE, data);
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+}
+#endif
+
+/**
+ * uvd_v6_0_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v6_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ uint32_t mp_swap_cntl;
+ int i, j, r;
+
+ /* disable DPG */
+ WREG32_P(mmUVD_POWER_STATUS, 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+ mp_swap_cntl = 0;
+
+ uvd_v6_0_mc_resume(adev);
+
+ /* disable interupt */
+ WREG32_FIELD(UVD_MASTINT_EN, VCPU_EN, 0);
+
+ /* stall UMC and register bus before resetting VCPU */
+ WREG32_FIELD(UVD_LMI_CTRL2, STALL_ARB_UMC, 1);
+ mdelay(1);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32(mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* take UVD block out of reset */
+ WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_UVD, 0);
+ mdelay(5);
+
+ /* initialize UVD memory controller */
+ WREG32(mmUVD_LMI_CTRL,
+ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__DISABLE_ON_FWV_FAIL_MASK);
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+ WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32(mmUVD_MPC_SET_ALU, 0);
+ WREG32(mmUVD_MPC_SET_MUX, 0x88);
+
+ /* take all subblocks out of reset, except VCPU */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable UMC */
+ WREG32_FIELD(UVD_LMI_CTRL2, STALL_ARB_UMC, 0);
+
+ /* boot up the VCPU */
+ WREG32(mmUVD_SOFT_RESET, 0);
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+ WREG32_FIELD(UVD_SOFT_RESET, VCPU_SOFT_RESET, 1);
+ mdelay(10);
+ WREG32_FIELD(UVD_SOFT_RESET, VCPU_SOFT_RESET, 0);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD not responding, giving up!!!\n");
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(mmUVD_MASTINT_EN,
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+ /* clear the bit 4 of UVD_STATUS */
+ WREG32_P(mmUVD_STATUS, 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32(mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32(mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+
+ WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0);
+
+ if (uvd_v6_0_enc_support(adev)) {
+ ring = &adev->uvd.inst->ring_enc[0];
+ WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->uvd.inst->ring_enc[1];
+ WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32(mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
+
+ return 0;
+}
+
+/**
+ * uvd_v6_0_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v6_0_stop(struct amdgpu_device *adev)
+{
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+ mdelay(1);
+
+ /* put VCPU into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* disable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 0x0);
+
+ /* Unstall UMC and register bus */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ WREG32(mmUVD_STATUS, 0);
+}
+
+/**
+ * uvd_v6_0_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v6_0_enc_ring_emit_fence - emit an enc fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write enc a fence and a trap command to the ring.
+ */
+static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
+}
+
+/**
+ * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
+ * uvd_v6_0_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v6_0_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH, 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * uvd_v6_0_enc_ring_emit_ib - enc execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrive vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write enc ring commands to execute the indirect buffer
+ */
+static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void uvd_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0x8);
+}
+
+static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+ amdgpu_ring_write(ring, 1 << vmid); /* mask */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0xC);
+}
+
+static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH9, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0xE);
+}
+
+static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_WAIT_GE);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+}
+
+static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
+}
+
+static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, pd_addr >> 12);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_FLUSH_TLB);
+ amdgpu_ring_write(ring, vmid);
+}
+
+static bool uvd_v6_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v6_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (uvd_v6_0_is_idle(handle))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
+static bool uvd_v6_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
+ REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
+ (RREG32(mmUVD_STATUS) & AMDGPU_UVD_STATUS_BUSY_MASK))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
+
+ if (srbm_soft_reset) {
+ adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->uvd.inst->srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int uvd_v6_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->uvd.inst->srbm_soft_reset)
+ return 0;
+
+ uvd_v6_0_stop(adev);
+ return 0;
+}
+
+static int uvd_v6_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->uvd.inst->srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int uvd_v6_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->uvd.inst->srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return uvd_v6_0_start(adev);
+}
+
+static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ // TODO
+ return 0;
+}
+
+static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ bool int_handled = true;
+ DRM_DEBUG("IH: UVD TRAP\n");
+
+ switch (entry->src_id) {
+ case 124:
+ amdgpu_fence_process(&adev->uvd.inst->ring);
+ break;
+ case 119:
+ if (likely(uvd_v6_0_enc_support(adev)))
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
+ else
+ int_handled = false;
+ break;
+ case 120:
+ if (likely(uvd_v6_0_enc_support(adev)))
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
+ else
+ int_handled = false;
+ break;
+ }
+
+ if (!int_handled)
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+
+ return 0;
+}
+
+static void uvd_v6_0_enable_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t data1, data3;
+
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+ data3 = RREG32(mmUVD_CGC_GATE);
+
+ data1 |= UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK |
+ UVD_SUVD_CGC_GATE__SRE_H264_MASK |
+ UVD_SUVD_CGC_GATE__SRE_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SIT_H264_MASK |
+ UVD_SUVD_CGC_GATE__SIT_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SCM_H264_MASK |
+ UVD_SUVD_CGC_GATE__SCM_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SDB_H264_MASK |
+ UVD_SUVD_CGC_GATE__SDB_HEVC_MASK;
+
+ if (enable) {
+ data3 |= (UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__LMI_UMC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__SCPU_MASK |
+ UVD_CGC_GATE__JPEG2_MASK);
+ /* only in pg enabled, we can gate clock to vcpu*/
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD)
+ data3 |= UVD_CGC_GATE__VCPU_MASK;
+
+ data3 &= ~UVD_CGC_GATE__REGS_MASK;
+ } else {
+ data3 = 0;
+ }
+
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+ WREG32(mmUVD_CGC_GATE, data3);
+}
+
+static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data2;
+
+ data = RREG32(mmUVD_CGC_CTRL);
+ data2 = RREG32(mmUVD_SUVD_CGC_CTRL);
+
+
+ data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
+ UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+
+
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
+ (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
+
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__JPEG_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK |
+ UVD_CGC_CTRL__JPEG2_MODE_MASK);
+ data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
+
+ WREG32(mmUVD_CGC_CTRL, data);
+ WREG32(mmUVD_SUVD_CGC_CTRL, data2);
+}
+
+#if 0
+static void uvd_v6_0_set_hw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data1, cgc_flags, suvd_flags;
+
+ data = RREG32(mmUVD_CGC_GATE);
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+
+ cgc_flags = UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__SCPU_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__JPEG2_MASK;
+
+ suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK;
+
+ data |= cgc_flags;
+ data1 |= suvd_flags;
+
+ WREG32(mmUVD_CGC_GATE, data);
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+}
+#endif
+
+static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
+static int uvd_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (uvd_v6_0_wait_for_idle(handle))
+ return -EBUSY;
+ uvd_v6_0_enable_clock_gating(adev, true);
+ /* enable HW gates because UVD is idle */
+/* uvd_v6_0_set_hw_clock_gating(adev); */
+ } else {
+ /* disable HW gating and enable Sw gating */
+ uvd_v6_0_enable_clock_gating(adev, false);
+ }
+ uvd_v6_0_set_sw_clock_gating(adev);
+ return 0;
+}
+
+static int uvd_v6_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the UVD block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+
+ if (state == AMD_PG_STATE_GATE) {
+ uvd_v6_0_stop(adev);
+ } else {
+ ret = uvd_v6_0_start(adev);
+ if (ret)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ mutex_lock(&adev->pm.mutex);
+
+ if (adev->flags & AMD_IS_APU)
+ data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
+ else
+ data = RREG32_SMC(ixCURRENT_PG_STATUS);
+
+ if (data & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
+ DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
+ goto out;
+ }
+
+ /* AMD_CG_SUPPORT_UVD_MGCG */
+ data = RREG32(mmUVD_CGC_CTRL);
+ if (data & UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK)
+ *flags |= AMD_CG_SUPPORT_UVD_MGCG;
+
+out:
+ mutex_unlock(&adev->pm.mutex);
+}
+
+static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
+ .name = "uvd_v6_0",
+ .early_init = uvd_v6_0_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v6_0_sw_init,
+ .sw_fini = uvd_v6_0_sw_fini,
+ .hw_init = uvd_v6_0_hw_init,
+ .hw_fini = uvd_v6_0_hw_fini,
+ .suspend = uvd_v6_0_suspend,
+ .resume = uvd_v6_0_resume,
+ .is_idle = uvd_v6_0_is_idle,
+ .wait_for_idle = uvd_v6_0_wait_for_idle,
+ .check_soft_reset = uvd_v6_0_check_soft_reset,
+ .pre_soft_reset = uvd_v6_0_pre_soft_reset,
+ .soft_reset = uvd_v6_0_soft_reset,
+ .post_soft_reset = uvd_v6_0_post_soft_reset,
+ .set_clockgating_state = uvd_v6_0_set_clockgating_state,
+ .set_powergating_state = uvd_v6_0_set_powergating_state,
+ .get_clockgating_state = uvd_v6_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v6_0_ring_get_rptr,
+ .get_wptr = uvd_v6_0_ring_get_wptr,
+ .set_wptr = uvd_v6_0_ring_set_wptr,
+ .parse_cs = amdgpu_uvd_ring_parse_cs,
+ .emit_frame_size =
+ 6 + /* hdp invalidate */
+ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
+ 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
+ .emit_ib = uvd_v6_0_ring_emit_ib,
+ .emit_fence = uvd_v6_0_ring_emit_fence,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+ .test_ring = uvd_v6_0_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v6_0_ring_emit_wreg,
+};
+
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v6_0_ring_get_rptr,
+ .get_wptr = uvd_v6_0_ring_get_wptr,
+ .set_wptr = uvd_v6_0_ring_set_wptr,
+ .emit_frame_size =
+ 6 + /* hdp invalidate */
+ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
+ 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
+ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
+ .emit_ib = uvd_v6_0_ring_emit_ib,
+ .emit_fence = uvd_v6_0_ring_emit_fence,
+ .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
+ .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+ .test_ring = uvd_v6_0_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v6_0_ring_emit_wreg,
+};
+
+static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD_ENC,
+ .align_mask = 0x3f,
+ .nop = HEVC_ENC_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v6_0_enc_ring_get_rptr,
+ .get_wptr = uvd_v6_0_enc_ring_get_wptr,
+ .set_wptr = uvd_v6_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ 4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */
+ 5 + /* uvd_v6_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* uvd_v6_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */
+ .emit_ib = uvd_v6_0_enc_ring_emit_ib,
+ .emit_fence = uvd_v6_0_enc_ring_emit_fence,
+ .emit_vm_flush = uvd_v6_0_enc_ring_emit_vm_flush,
+ .emit_pipeline_sync = uvd_v6_0_enc_ring_emit_pipeline_sync,
+ .test_ring = uvd_v6_0_enc_ring_test_ring,
+ .test_ib = uvd_v6_0_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = uvd_v6_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+};
+
+static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ if (adev->asic_type >= CHIP_POLARIS10) {
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
+ DRM_INFO("UVD is enabled in VM mode\n");
+ } else {
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
+ DRM_INFO("UVD is enabled in physical mode\n");
+ }
+}
+
+static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
+
+ DRM_INFO("UVD ENC is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
+ .set = uvd_v6_0_set_interrupt_state,
+ .process = uvd_v6_0_process_interrupt,
+};
+
+static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ if (uvd_v6_0_enc_support(adev))
+ adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
+ else
+ adev->uvd.inst->irq.num_types = 1;
+
+ adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version uvd_v6_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &uvd_v6_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version uvd_v6_2_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 6,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &uvd_v6_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version uvd_v6_3_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 6,
+ .minor = 3,
+ .rev = 0,
+ .funcs = &uvd_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.h b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.h
new file mode 100644
index 000000000..d3d48c642
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V6_0_H__
+#define __UVD_V6_0_H__
+
+extern const struct amdgpu_ip_block_version uvd_v6_0_ip_block;
+extern const struct amdgpu_ip_block_version uvd_v6_2_ip_block;
+extern const struct amdgpu_ip_block_version uvd_v6_3_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
new file mode 100644
index 000000000..86d1d46e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -0,0 +1,1917 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "mmsch_v1_0.h"
+
+#include "uvd/uvd_7_0_offset.h"
+#include "uvd/uvd_7_0_sh_mask.h"
+#include "vce/vce_4_0_offset.h"
+#include "vce/vce_4_0_default.h"
+#include "vce/vce_4_0_sh_mask.h"
+#include "nbif/nbif_6_1_offset.h"
+#include "mmhub/mmhub_1_0_offset.h"
+#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "ivsrcid/uvd/irqsrcs_uvd_7_0.h"
+
+#define mmUVD_PG0_CC_UVD_HARVESTING 0x00c7
+#define mmUVD_PG0_CC_UVD_HARVESTING_BASE_IDX 1
+//UVD_PG0_CC_UVD_HARVESTING
+#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE__SHIFT 0x1
+#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK 0x00000002L
+
+#define UVD7_MAX_HW_INSTANCES_VEGA20 2
+
+static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
+static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int uvd_v7_0_start(struct amdgpu_device *adev);
+static void uvd_v7_0_stop(struct amdgpu_device *adev);
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
+
+static int amdgpu_ih_clientid_uvds[] = {
+ SOC15_IH_CLIENTID_UVD,
+ SOC15_IH_CLIENTID_UVD1
+};
+
+/**
+ * uvd_v7_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v7_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
+}
+
+/**
+ * uvd_v7_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v7_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
+ else
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
+}
+
+/**
+ * uvd_v7_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v7_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ return;
+ }
+
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ else
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
+ lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v7_0_enc_ring_test_ring - test if UVD ENC ring is working
+ *
+ * @ring: the engine to test on
+ *
+ */
+static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t rptr;
+ unsigned i;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ r = amdgpu_ring_alloc(ring, 16);
+ if (r)
+ return r;
+
+ rptr = amdgpu_ring_get_rptr(ring);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (amdgpu_ring_get_rptr(ring) != rptr)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: session handle to use
+ * @bo: amdgpu object for which we query the offset
+ * @fence: optional fence to return
+ *
+ * Open up a stream for HW test
+ */
+static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
+{
+ const unsigned ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = amdgpu_bo_gpu_offset(bo);
+
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: session handle to use
+ * @bo: amdgpu object for which we query the offset
+ * @fence: optional fence to return
+ *
+ * Close up a stream for HW test or if userspace failed to do so
+ */
+static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
+{
+ const unsigned ib_size_dw = 16;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ uint64_t addr;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+ addr = amdgpu_bo_gpu_offset(bo);
+
+ ib->length_dw = 0;
+ ib->ptr[ib->length_dw++] = 0x00000018;
+ ib->ptr[ib->length_dw++] = 0x00000001;
+ ib->ptr[ib->length_dw++] = handle;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
+
+ ib->ptr[ib->length_dw++] = 0x00000014;
+ ib->ptr[ib->length_dw++] = 0x00000002;
+ ib->ptr[ib->length_dw++] = 0x0000001c;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+
+ ib->ptr[ib->length_dw++] = 0x00000008;
+ ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+
+ for (i = ib->length_dw; i < ib_size_dw; ++i)
+ ib->ptr[i] = 0x0;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+/**
+ * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working
+ *
+ * @ring: the engine to test on
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ */
+static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
+ long r;
+
+ r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
+ if (r)
+ goto error;
+
+ r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0)
+ r = -ETIMEDOUT;
+ else if (r > 0)
+ r = 0;
+
+error:
+ dma_fence_put(fence);
+ return r;
+}
+
+static int uvd_v7_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->asic_type == CHIP_VEGA20) {
+ u32 harvest;
+ int i;
+
+ adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ harvest = RREG32_SOC15(UVD, i, mmUVD_PG0_CC_UVD_HARVESTING);
+ if (harvest & UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK) {
+ adev->uvd.harvest_config |= 1 << i;
+ }
+ }
+ if (adev->uvd.harvest_config == (AMDGPU_UVD_HARVEST_UVD0 |
+ AMDGPU_UVD_HARVEST_UVD1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+ } else {
+ adev->uvd.num_uvd_inst = 1;
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ adev->uvd.num_enc_rings = 1;
+ else
+ adev->uvd.num_enc_rings = 2;
+ uvd_v7_0_set_ring_funcs(adev);
+ uvd_v7_0_set_enc_ring_funcs(adev);
+ uvd_v7_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v7_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+
+ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq);
+ if (r)
+ return r;
+
+ /* UVD ENC TRAP */
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq);
+ if (r)
+ return r;
+ }
+ }
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].ucode_id = AMDGPU_UCODE_ID_UVD;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].fw = adev->uvd.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->uvd.num_uvd_inst == UVD7_MAX_HW_INSTANCES_VEGA20) {
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].ucode_id = AMDGPU_UCODE_ID_UVD1;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].fw = adev->uvd.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ DRM_INFO("PSP loading UVD firmware\n");
+ }
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ if (!amdgpu_sriov_vf(adev)) {
+ ring = &adev->uvd.inst[j].ring;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "uvd_%d", ring->me);
+ r = amdgpu_ring_init(adev, ring, 512,
+ &adev->uvd.inst[j].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
+ if (amdgpu_sriov_vf(adev)) {
+ ring->use_doorbell = true;
+
+ /* currently only use the first enconding ring for
+ * sriov, so set unused location for other unused rings.
+ */
+ if (i == 0)
+ ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2;
+ else
+ ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1;
+ }
+ r = amdgpu_ring_init(adev, ring, 512,
+ &adev->uvd.inst[j].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+ }
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int uvd_v7_0_sw_fini(void *handle)
+{
+ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+/**
+ * uvd_v7_0_hw_init - start and test UVD block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v7_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int i, j, r;
+
+ if (amdgpu_sriov_vf(adev))
+ r = uvd_v7_0_sriov_start(adev);
+ else
+ r = uvd_v7_0_start(adev);
+ if (r)
+ goto done;
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ ring = &adev->uvd.inst[j].ring;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+ goto done;
+ }
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_TIMEOUT_STATUS), 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_CNTL), 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+ }
+
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+done:
+ if (!r)
+ DRM_INFO("UVD and UVD ENC initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * uvd_v7_0_hw_fini - stop the hardware block
+ *
+ * @handle: handle used to pass amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v7_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (!amdgpu_sriov_vf(adev))
+ uvd_v7_0_stop(adev);
+ else {
+ /* full access mode, so don't touch any UVD register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ }
+
+ return 0;
+}
+
+static int uvd_v7_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_uvd(adev, false);
+ } else {
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+ /* shutdown the UVD block */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = uvd_v7_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v7_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v7_0_hw_init(adev);
+}
+
+/**
+ * uvd_v7_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
+ uint32_t offset;
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ i == 0 ?
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo :
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ i == 0 ?
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi :
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+ }
+}
+
+static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
+ uint32_t size;
+ int i;
+
+ size = header->header_size + header->vce_table_size + header->uvd_table_size;
+
+ /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID);
+ data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
+ *adev->uvd.inst[i].ring_enc[0].wptr_cpu_addr = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
+ }
+ /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
+ loop = 1000;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(10);
+ data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size, tmp;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { {0} };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
+ struct mmsch_v1_0_cmd_direct_polling direct_poll = { {0} };
+ struct mmsch_v1_0_cmd_end end = { {0} };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+ uint8_t i = 0;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) {
+ header->version = MMSCH_VERSION;
+ header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
+
+ if (header->vce_table_offset == 0 && header->vce_table_size == 0)
+ header->uvd_table_offset = header->header_size;
+ else
+ header->uvd_table_offset = header->vce_table_size + header->vce_table_offset;
+
+ init_table += header->uvd_table_offset;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ ring = &adev->uvd.inst[i].ring;
+ ring->wptr = 0;
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ 0xFFFFFFFF, 0x00000004);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0);
+ offset = 0;
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+ /* mc resume end*/
+
+ /* disable clock gating */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+ /* disable interupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ /* stall UMC and register bus before resetting VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+ /* initialize UVD memory controller */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
+ (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L));
+
+ /* take all subblocks out of reset, except VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable VCPU clock */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable master interrupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+ /* clear the bit 4 of UVD_STATUS */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+ /* force RBC into idle state */
+ size = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ ring = &adev->uvd.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+ /* boot up the VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
+
+ /* enable UMC */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
+ }
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ header->uvd_table_size = table_size;
+
+ }
+ return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+/**
+ * uvd_v7_0_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v7_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ uint32_t mp_swap_cntl;
+ int i, j, k, r;
+
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ if (adev->uvd.harvest_config & (1 << k))
+ continue;
+ /* disable DPG */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ }
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+ mp_swap_cntl = 0;
+
+ uvd_v7_0_mc_resume(adev);
+
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ if (adev->uvd.harvest_config & (1 << k))
+ continue;
+ ring = &adev->uvd.inst[k].ring;
+ /* disable clock gating */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
+
+ /* disable interupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* initialize UVD memory controller */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
+ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L);
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
+
+ /* take all subblocks out of reset, except VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
+ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* boot up the VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+ /* clear the bit 4 of UVD_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ ring = &adev->uvd.inst[k].ring_enc[0];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->uvd.inst[k].ring_enc[1];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
+ return 0;
+}
+
+/**
+ * uvd_v7_0_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v7_0_stop(struct amdgpu_device *adev)
+{
+ uint8_t i = 0;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ /* force RBC into idle state */
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
+
+ /* put VCPU into reset */
+ WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* disable VCPU clock */
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
+
+ /* Unstall UMC and register bus */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ }
+}
+
+/**
+ * uvd_v7_0_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write enc a fence and a trap command to the ring.
+ */
+static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP);
+}
+
+/**
+ * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
+ * uvd_v7_0_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+/**
+ * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
+ *
+ * @p: the CS parser with the IBs
+ * @job: which job this ib is in
+ * @ib: which IB to patch
+ *
+ */
+static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ unsigned i;
+
+ /* No patching necessary for the first instance */
+ if (!ring->me)
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+
+ reg -= p->adev->reg_offset[UVD_HWIP][0][1];
+ reg += p->adev->reg_offset[UVD_HWIP][1][1];
+
+ amdgpu_ib_set_value(ib, i, reg);
+ }
+ return 0;
+}
+
+/**
+ * uvd_v7_0_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrive vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write enc ring commands to execute the indirect buffer
+ */
+static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 8);
+}
+
+static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 12);
+}
+
+static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
+}
+
+static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+#if 0
+static bool uvd_v7_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v7_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (uvd_v7_0_is_idle(handle))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
+static bool uvd_v7_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+ u32 tmp = RREG32(mmSRBM_STATUS);
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
+ REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
+ (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
+ AMDGPU_UVD_STATUS_BUSY_MASK))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
+
+ if (srbm_soft_reset) {
+ adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->uvd.inst[ring->me].srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int uvd_v7_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+
+ uvd_v7_0_stop(adev);
+ return 0;
+}
+
+static int uvd_v7_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int uvd_v7_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return uvd_v7_0_start(adev);
+}
+#endif
+
+static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ // TODO
+ return 0;
+}
+
+static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_UVD:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_UVD1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: UVD TRAP\n");
+
+ switch (entry->src_id) {
+ case 124:
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
+ break;
+ case 119:
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
+ break;
+ case 120:
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+#if 0
+static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data1, data2, suvd_flags;
+
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+ data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
+
+ data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
+ UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+
+ suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK;
+
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
+ (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
+
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__JPEG_MODE_MASK |
+ UVD_CGC_CTRL__JPEG2_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
+ UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
+ data1 |= suvd_flags;
+
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
+}
+
+static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data1, cgc_flags, suvd_flags;
+
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+
+ cgc_flags = UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__SCPU_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__JPEG2_MASK;
+
+ suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK;
+
+ data |= cgc_flags;
+ data1 |= suvd_flags;
+
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+}
+
+static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
+
+ if (enable)
+ tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
+ GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
+ else
+ tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
+ GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
+
+ WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
+}
+
+
+static int uvd_v7_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ uvd_v7_0_set_bypass_mode(adev, enable);
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+ return 0;
+
+ if (enable) {
+ /* disable HW gating and enable Sw gating */
+ uvd_v7_0_set_sw_clock_gating(adev);
+ } else {
+ /* wait for STATUS to clear */
+ if (uvd_v7_0_wait_for_idle(handle))
+ return -EBUSY;
+
+ /* enable HW gates because UVD is idle */
+ /* uvd_v7_0_set_hw_clock_gating(adev); */
+ }
+
+ return 0;
+}
+
+static int uvd_v7_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the UVD block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+ return 0;
+
+ WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+
+ if (state == AMD_PG_STATE_GATE) {
+ uvd_v7_0_stop(adev);
+ return 0;
+ } else {
+ return uvd_v7_0_start(adev);
+ }
+}
+#endif
+
+static int uvd_v7_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ /* needed for driver unload*/
+ return 0;
+}
+
+const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
+ .name = "uvd_v7_0",
+ .early_init = uvd_v7_0_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v7_0_sw_init,
+ .sw_fini = uvd_v7_0_sw_fini,
+ .hw_init = uvd_v7_0_hw_init,
+ .hw_fini = uvd_v7_0_hw_fini,
+ .suspend = uvd_v7_0_suspend,
+ .resume = uvd_v7_0_resume,
+ .is_idle = NULL /* uvd_v7_0_is_idle */,
+ .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
+ .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
+ .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
+ .soft_reset = NULL /* uvd_v7_0_soft_reset */,
+ .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
+ .set_clockgating_state = uvd_v7_0_set_clockgating_state,
+ .set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
+};
+
+static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v7_0_ring_get_rptr,
+ .get_wptr = uvd_v7_0_ring_get_wptr,
+ .set_wptr = uvd_v7_0_ring_set_wptr,
+ .patch_cs_in_place = uvd_v7_0_ring_patch_cs_in_place,
+ .emit_frame_size =
+ 6 + /* hdp invalidate */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* uvd_v7_0_ring_emit_vm_flush */
+ 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
+ .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
+ .emit_ib = uvd_v7_0_ring_emit_ib,
+ .emit_fence = uvd_v7_0_ring_emit_fence,
+ .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
+ .test_ring = uvd_v7_0_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v7_0_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v7_0_ring_emit_wreg,
+ .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD_ENC,
+ .align_mask = 0x3f,
+ .nop = HEVC_ENC_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v7_0_enc_ring_get_rptr,
+ .get_wptr = uvd_v7_0_enc_ring_get_wptr,
+ .set_wptr = uvd_v7_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ 3 + 3 + /* hdp flush / invalidate */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* uvd_v7_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
+ .emit_ib = uvd_v7_0_enc_ring_emit_ib,
+ .emit_fence = uvd_v7_0_enc_ring_emit_fence,
+ .emit_vm_flush = uvd_v7_0_enc_ring_emit_vm_flush,
+ .test_ring = uvd_v7_0_enc_ring_test_ring,
+ .test_ib = uvd_v7_0_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = uvd_v7_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
+ .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
+ adev->uvd.inst[i].ring.me = i;
+ DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+ }
+}
+
+static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+ adev->uvd.inst[j].ring_enc[i].me = j;
+ }
+
+ DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+ }
+}
+
+static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
+ .set = uvd_v7_0_set_interrupt_state,
+ .process = uvd_v7_0_process_interrupt,
+};
+
+static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+ adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
+ adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version uvd_v7_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &uvd_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h
new file mode 100644
index 000000000..cbe82ab32
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V7_0_H__
+#define __UVD_V7_0_H__
+
+extern const struct amdgpu_ip_block_version uvd_v7_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
new file mode 100644
index 000000000..67eb01fef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "cikd.h"
+#include "vce/vce_2_0_d.h"
+#include "vce/vce_2_0_sh_mask.h"
+#include "smu/smu_7_0_1_d.h"
+#include "smu/smu_7_0_1_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#define VCE_V2_0_FW_SIZE (256 * 1024)
+#define VCE_V2_0_STACK_SIZE (64 * 1024)
+#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES)
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+
+/**
+ * vce_v2_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_RPTR);
+ else
+ return RREG32(mmVCE_RB_RPTR2);
+}
+
+/**
+ * vce_v2_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(mmVCE_RB_WPTR);
+ else
+ return RREG32(mmVCE_RB_WPTR2);
+}
+
+/**
+ * vce_v2_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ else
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+}
+
+static int vce_v2_0_lmi_clean(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ uint32_t status = RREG32(mmVCE_LMI_STATUS);
+
+ if (status & 0x337f)
+ return 0;
+ mdelay(10);
+ }
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int vce_v2_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ uint32_t status = RREG32(mmVCE_STATUS);
+
+ if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
+ WREG32_P(mmVCE_SOFT_RESET,
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmVCE_SOFT_RESET, 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static void vce_v2_0_disable_cg(struct amdgpu_device *adev)
+{
+ WREG32(mmVCE_CGTT_CLK_OVERRIDE, 7);
+}
+
+static void vce_v2_0_init_cg(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_A);
+ tmp &= ~0xfff;
+ tmp |= ((0 << 0) | (4 << 4));
+ tmp |= 0x40000;
+ WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0xfff;
+ tmp |= ((0 << 0) | (4 << 4));
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0x10;
+ tmp &= ~0x100000;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+}
+
+static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size, offset;
+
+ WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(mmVCE_CLOCK_GATING_B, 0xf7);
+
+ WREG32(mmVCE_LMI_CTRL, 0x00398000);
+ WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+ WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+ WREG32(mmVCE_LMI_VM_CTRL, 0);
+
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
+
+ offset = AMDGPU_VCE_FIRMWARE_OFFSET;
+ size = VCE_V2_0_FW_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+ offset += size;
+ size = VCE_V2_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+
+ offset += size;
+ size = VCE_V2_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+ WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
+}
+
+static bool vce_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+}
+
+static int vce_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (vce_v2_0_is_idle(handle))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+/**
+ * vce_v2_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* set BUSY flag */
+ WREG32_P(mmVCE_STATUS, 1, ~1);
+
+ vce_v2_0_init_cg(adev);
+ vce_v2_0_disable_cg(adev);
+
+ vce_v2_0_mc_resume(adev);
+
+ ring = &adev->vce.ring[0];
+ WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+ WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+ WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+ WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+ WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
+ mdelay(100);
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
+
+ r = vce_v2_0_firmware_loaded(adev);
+
+ /* clear BUSY flag */
+ WREG32_P(mmVCE_STATUS, 0, ~1);
+
+ if (r) {
+ DRM_ERROR("VCE not responding, giving up!!!\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int vce_v2_0_stop(struct amdgpu_device *adev)
+{
+ int i;
+ int status;
+
+ if (vce_v2_0_lmi_clean(adev)) {
+ DRM_INFO("vce is not idle \n");
+ return 0;
+ }
+
+ if (vce_v2_0_wait_for_idle(adev)) {
+ DRM_INFO("VCE is busy, Can't set clock gating");
+ return 0;
+ }
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 100; ++i) {
+ status = RREG32(mmVCE_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+
+ WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x80001);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32_P(mmVCE_SOFT_RESET, 1, ~0x1);
+
+ WREG32(mmVCE_STATUS, 0);
+
+ return 0;
+}
+
+static void vce_v2_0_set_sw_cg(struct amdgpu_device *adev, bool gated)
+{
+ u32 tmp;
+
+ if (gated) {
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0xe70000;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp |= 0xff000000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3fc;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+
+ WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
+ } else {
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp |= 0xe7;
+ tmp &= ~0xe70000;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp |= 0x1fe000;
+ tmp &= ~0xff000000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp |= 0x3fc;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+ }
+}
+
+static void vce_v2_0_set_dyn_cg(struct amdgpu_device *adev, bool gated)
+{
+ u32 orig, tmp;
+
+/* LMI_MC/LMI_UMC always set in dynamic,
+ * set {CGC_*_GATE_MODE, CGC_*_SW_GATE} = {0, 0}
+ */
+ tmp = RREG32(mmVCE_CLOCK_GATING_B);
+ tmp &= ~0x00060006;
+
+/* Exception for ECPU, IH, SEM, SYS blocks needs to be turned on/off by SW */
+ if (gated) {
+ tmp |= 0xe10000;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+ } else {
+ tmp |= 0xe1;
+ tmp &= ~0xe10000;
+ WREG32(mmVCE_CLOCK_GATING_B, tmp);
+ }
+
+ orig = tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+ tmp &= ~0x1fe000;
+ tmp &= ~0xff000000;
+ if (tmp != orig)
+ WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+ orig = tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3fc;
+ if (tmp != orig)
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+
+ /* set VCE_UENC_REG_CLOCK_GATING always in dynamic mode */
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, 0x00);
+
+ if(gated)
+ WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
+}
+
+static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
+ bool sw_cg)
+{
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
+ if (sw_cg)
+ vce_v2_0_set_sw_cg(adev, true);
+ else
+ vce_v2_0_set_dyn_cg(adev, true);
+ } else {
+ vce_v2_0_disable_cg(adev);
+
+ if (sw_cg)
+ vce_v2_0_set_sw_cg(adev, false);
+ else
+ vce_v2_0_set_dyn_cg(adev, false);
+ }
+}
+
+static int vce_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->vce.num_rings = 2;
+
+ vce_v2_0_set_ring_funcs(adev);
+ vce_v2_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int vce_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* VCE */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_sw_init(adev, VCE_V2_0_FW_SIZE +
+ VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ sprintf(ring->name, "vce%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vce_entity_init(adev);
+
+ return r;
+}
+
+static int vce_v2_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+static int vce_v2_0_hw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+ vce_v2_0_enable_mgcg(adev, true, false);
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ return 0;
+}
+
+static int vce_v2_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+
+ return vce_v2_0_hw_init(adev);
+}
+
+static int vce_v2_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1);
+ mdelay(5);
+
+ return vce_v2_0_start(adev);
+}
+
+static int vce_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ return 0;
+}
+
+static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCE\n");
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vce_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ bool gate = false;
+ bool sw_cg = false;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_CG_STATE_GATE) {
+ gate = true;
+ sw_cg = true;
+ }
+
+ vce_v2_0_enable_mgcg(adev, gate, sw_cg);
+
+ return 0;
+}
+
+static int vce_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ return vce_v2_0_stop(adev);
+ else
+ return vce_v2_0_start(adev);
+}
+
+static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
+ .name = "vce_v2_0",
+ .early_init = vce_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = vce_v2_0_sw_init,
+ .sw_fini = vce_v2_0_sw_fini,
+ .hw_init = vce_v2_0_hw_init,
+ .hw_fini = vce_v2_0_hw_fini,
+ .suspend = vce_v2_0_suspend,
+ .resume = vce_v2_0_resume,
+ .is_idle = vce_v2_0_is_idle,
+ .wait_for_idle = vce_v2_0_wait_for_idle,
+ .soft_reset = vce_v2_0_soft_reset,
+ .set_clockgating_state = vce_v2_0_set_clockgating_state,
+ .set_powergating_state = vce_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v2_0_ring_get_rptr,
+ .get_wptr = vce_v2_0_ring_get_wptr,
+ .set_wptr = vce_v2_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs,
+ .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib = amdgpu_vce_ring_emit_ib,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v2_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs vce_v2_0_irq_funcs = {
+ .set = vce_v2_0_set_interrupt_state,
+ .process = vce_v2_0_process_interrupt,
+};
+
+static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v2_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v2_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.h
new file mode 100644
index 000000000..4d1516765
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V2_0_H__
+#define __VCE_V2_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v2_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
new file mode 100644
index 000000000..18f6e62af
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -0,0 +1,1023 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "vid.h"
+#include "vce/vce_3_0_d.h"
+#include "vce/vce_3_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "smu/smu_7_1_2_d.h"
+#include "smu/smu_7_1_2_sh_mask.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+
+#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
+#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
+#define GRBM_GFX_INDEX__VCE_ALL_PIPE 0x07
+
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
+#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
+
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+#define VCE_V3_0_FW_SIZE (384 * 1024)
+#define VCE_V3_0_STACK_SIZE (64 * 1024)
+#define VCE_V3_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
+
+#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
+
+#define GET_VCE_INSTANCE(i) ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
+ | GRBM_GFX_INDEX__VCE_ALL_PIPE)
+
+static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
+static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vce_v3_0_wait_for_idle(void *handle);
+static int vce_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+/**
+ * vce_v3_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 v;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (adev->vce.harvest_config == 0 ||
+ adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+ else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+
+ if (ring->me == 0)
+ v = RREG32(mmVCE_RB_RPTR);
+ else if (ring->me == 1)
+ v = RREG32(mmVCE_RB_RPTR2);
+ else
+ v = RREG32(mmVCE_RB_RPTR3);
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return v;
+}
+
+/**
+ * vce_v3_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 v;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (adev->vce.harvest_config == 0 ||
+ adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+ else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+
+ if (ring->me == 0)
+ v = RREG32(mmVCE_RB_WPTR);
+ else if (ring->me == 1)
+ v = RREG32(mmVCE_RB_WPTR2);
+ else
+ v = RREG32(mmVCE_RB_WPTR3);
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return v;
+}
+
+/**
+ * vce_v3_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (adev->vce.harvest_config == 0 ||
+ adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+ else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+
+ if (ring->me == 0)
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ else if (ring->me == 1)
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ else
+ WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
+{
+ WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
+}
+
+static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
+ bool gated)
+{
+ u32 data;
+
+ /* Set Override to disable Clock Gating */
+ vce_v3_0_override_vce_clock_gating(adev, true);
+
+ /* This function enables MGCG which is controlled by firmware.
+ With the clocks in the gated state the core is still
+ accessible but the firmware will throttle the clocks on the
+ fly as necessary.
+ */
+ if (!gated) {
+ data = RREG32(mmVCE_CLOCK_GATING_B);
+ data |= 0x1ff;
+ data &= ~0xef0000;
+ WREG32(mmVCE_CLOCK_GATING_B, data);
+
+ data = RREG32(mmVCE_UENC_CLOCK_GATING);
+ data |= 0x3ff000;
+ data &= ~0xffc00000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, data);
+
+ data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
+ data |= 0x2;
+ data &= ~0x00010000;
+ WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
+
+ data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ data |= 0x37f;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
+
+ data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
+ data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
+ 0x8;
+ WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
+ } else {
+ data = RREG32(mmVCE_CLOCK_GATING_B);
+ data &= ~0x80010;
+ data |= 0xe70008;
+ WREG32(mmVCE_CLOCK_GATING_B, data);
+
+ data = RREG32(mmVCE_UENC_CLOCK_GATING);
+ data |= 0xffc00000;
+ WREG32(mmVCE_UENC_CLOCK_GATING, data);
+
+ data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
+ data |= 0x10000;
+ WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
+
+ data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+ data &= ~0x3ff;
+ WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
+
+ data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
+ data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
+ 0x8);
+ WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
+ }
+ vce_v3_0_override_vce_clock_gating(adev, false);
+}
+
+static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ uint32_t status = RREG32(mmVCE_STATUS);
+
+ if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
+ mdelay(10);
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
+ mdelay(10);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * vce_v3_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v3_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int idx, r;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (idx = 0; idx < 2; ++idx) {
+ if (adev->vce.harvest_config & (1 << idx))
+ continue;
+
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
+
+ /* Program instance 0 reg space for two instances or instance 0 case
+ program instance 1 reg space for only instance 1 available case */
+ if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
+ ring = &adev->vce.ring[0];
+ WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+ WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+ WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+ WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+ ring = &adev->vce.ring[2];
+ WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+ WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
+ WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
+ WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
+ }
+
+ vce_v3_0_mc_resume(adev, idx);
+ WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
+
+ if (adev->asic_type >= CHIP_STONEY)
+ WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
+ else
+ WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
+
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
+ mdelay(100);
+
+ r = vce_v3_0_firmware_loaded(adev);
+
+ /* clear BUSY flag */
+ WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
+
+ if (r) {
+ DRM_ERROR("VCE not responding, giving up!!!\n");
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return r;
+ }
+ }
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static int vce_v3_0_stop(struct amdgpu_device *adev)
+{
+ int idx;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (idx = 0; idx < 2; ++idx) {
+ if (adev->vce.harvest_config & (1 << idx))
+ continue;
+
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
+
+ if (adev->asic_type >= CHIP_STONEY)
+ WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
+ else
+ WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
+
+ /* hold on ECPU */
+ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
+
+ /* clear VCE STATUS */
+ WREG32(mmVCE_STATUS, 0);
+ }
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS 0xC0014074
+#define VCE_HARVEST_FUSE_MACRO__SHIFT 27
+#define VCE_HARVEST_FUSE_MACRO__MASK 0x18000000
+
+static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if ((adev->asic_type == CHIP_FIJI) ||
+ (adev->asic_type == CHIP_STONEY))
+ return AMDGPU_VCE_HARVEST_VCE1;
+
+ if (adev->flags & AMD_IS_APU)
+ tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
+ VCE_HARVEST_FUSE_MACRO__MASK) >>
+ VCE_HARVEST_FUSE_MACRO__SHIFT;
+ else
+ tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
+ CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
+ CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
+
+ switch (tmp) {
+ case 1:
+ return AMDGPU_VCE_HARVEST_VCE0;
+ case 2:
+ return AMDGPU_VCE_HARVEST_VCE1;
+ case 3:
+ return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
+ default:
+ if ((adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_POLARIS11) ||
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM))
+ return AMDGPU_VCE_HARVEST_VCE1;
+
+ return 0;
+ }
+}
+
+static int vce_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
+
+ if ((adev->vce.harvest_config &
+ (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
+ (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
+ return -ENOENT;
+
+ adev->vce.num_rings = 3;
+
+ vce_v3_0_set_ring_funcs(adev);
+ vce_v3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int vce_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ /* VCE */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
+ (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
+ if (r)
+ return r;
+
+ /* 52.8.3 required for 3 ring support */
+ if (adev->vce.fw_version < FW_52_8_3)
+ adev->vce.num_rings = 2;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ sprintf(ring->name, "vce%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vce_entity_init(adev);
+
+ return r;
+}
+
+static int vce_v3_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+static int vce_v3_0_hw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vce_v3_0_override_vce_clock_gating(adev, true);
+
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v3_0_hw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ r = vce_v3_0_wait_for_idle(handle);
+ if (r)
+ return r;
+
+ vce_v3_0_stop(adev);
+ return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+}
+
+static int vce_v3_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v3_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+
+ return vce_v3_0_hw_init(adev);
+}
+
+static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
+{
+ uint32_t offset, size;
+
+ WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
+
+ WREG32(mmVCE_LMI_CTRL, 0x00398000);
+ WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+ WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+ WREG32(mmVCE_LMI_VM_CTRL, 0);
+ WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
+
+ if (adev->asic_type >= CHIP_STONEY) {
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
+ } else
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
+ offset = AMDGPU_VCE_FIRMWARE_OFFSET;
+ size = VCE_V3_0_FW_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+ if (idx == 0) {
+ offset += size;
+ size = VCE_V3_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+ offset += size;
+ size = VCE_V3_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+ } else {
+ offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
+ size = VCE_V3_0_STACK_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+ offset += size;
+ size = VCE_V3_0_DATA_SIZE;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
+ WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+ }
+
+ WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+ WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
+}
+
+static bool vce_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 mask = 0;
+
+ mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
+ mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
+
+ return !(RREG32(mmSRBM_STATUS2) & mask);
+}
+
+static int vce_v3_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++)
+ if (vce_v3_0_is_idle(handle))
+ return 0;
+
+ return -ETIMEDOUT;
+}
+
+#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
+#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
+#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
+#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
+ VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
+
+static bool vce_v3_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+
+ /* According to VCE team , we should use VCE_STATUS instead
+ * SRBM_STATUS.VCE_BUSY bit for busy status checking.
+ * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
+ * instance's registers are accessed
+ * (0 for 1st instance, 10 for 2nd instance).
+ *
+ *VCE_STATUS
+ *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
+ *|----+----+-----------+----+----+----+----------+---------+----|
+ *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
+ *
+ * VCE team suggest use bit 3--bit 6 for busy status check
+ */
+ mutex_lock(&adev->grbm_idx_mutex);
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+ if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
+ }
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+ if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
+ }
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ if (srbm_soft_reset) {
+ adev->vce.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->vce.srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int vce_v3_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->vce.srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int vce_v3_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return vce_v3_0_suspend(adev);
+}
+
+
+static int vce_v3_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return vce_v3_0_resume(adev);
+}
+
+static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ return 0;
+}
+
+static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCE\n");
+
+ WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
+
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ case 2:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vce_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
+ return 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < 2; i++) {
+ /* Program VCE Instance 0 or 1 if not harvested */
+ if (adev->vce.harvest_config & (1 << i))
+ continue;
+
+ WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
+
+ if (!enable) {
+ /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
+ uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
+ data &= ~(0xf | 0xff0);
+ data |= ((0x0 << 0) | (0x04 << 4));
+ WREG32(mmVCE_CLOCK_GATING_A, data);
+
+ /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
+ data = RREG32(mmVCE_UENC_CLOCK_GATING);
+ data &= ~(0xf | 0xff0);
+ data |= ((0x0 << 0) | (0x04 << 4));
+ WREG32(mmVCE_UENC_CLOCK_GATING, data);
+ }
+
+ vce_v3_0_set_vce_sw_clock_gating(adev, enable);
+ }
+
+ WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static int vce_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ if (state == AMD_PG_STATE_GATE) {
+ ret = vce_v3_0_stop(adev);
+ if (ret)
+ goto out;
+ } else {
+ ret = vce_v3_0_start(adev);
+ if (ret)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ mutex_lock(&adev->pm.mutex);
+
+ if (adev->flags & AMD_IS_APU)
+ data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
+ else
+ data = RREG32_SMC(ixCURRENT_PG_STATUS);
+
+ if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
+ DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
+ goto out;
+ }
+
+ WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
+
+ /* AMD_CG_SUPPORT_VCE_MGCG */
+ data = RREG32(mmVCE_CLOCK_GATING_A);
+ if (data & (0x04 << 4))
+ *flags |= AMD_CG_SUPPORT_VCE_MGCG;
+
+out:
+ mutex_unlock(&adev->pm.mutex);
+}
+
+static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCE_CMD_IB_VM);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, pd_addr >> 12);
+
+ amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, VCE_CMD_END);
+}
+
+static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+}
+
+static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
+ .name = "vce_v3_0",
+ .early_init = vce_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = vce_v3_0_sw_init,
+ .sw_fini = vce_v3_0_sw_fini,
+ .hw_init = vce_v3_0_hw_init,
+ .hw_fini = vce_v3_0_hw_fini,
+ .suspend = vce_v3_0_suspend,
+ .resume = vce_v3_0_resume,
+ .is_idle = vce_v3_0_is_idle,
+ .wait_for_idle = vce_v3_0_wait_for_idle,
+ .check_soft_reset = vce_v3_0_check_soft_reset,
+ .pre_soft_reset = vce_v3_0_pre_soft_reset,
+ .soft_reset = vce_v3_0_soft_reset,
+ .post_soft_reset = vce_v3_0_post_soft_reset,
+ .set_clockgating_state = vce_v3_0_set_clockgating_state,
+ .set_powergating_state = vce_v3_0_set_powergating_state,
+ .get_clockgating_state = vce_v3_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v3_0_ring_get_rptr,
+ .get_wptr = vce_v3_0_ring_get_wptr,
+ .set_wptr = vce_v3_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs,
+ .emit_frame_size =
+ 4 + /* vce_v3_0_emit_pipeline_sync */
+ 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+ .emit_ib = amdgpu_vce_ring_emit_ib,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0xf,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v3_0_ring_get_rptr,
+ .get_wptr = vce_v3_0_ring_get_wptr,
+ .set_wptr = vce_v3_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .emit_frame_size =
+ 6 + /* vce_v3_0_emit_vm_flush */
+ 4 + /* vce_v3_0_emit_pipeline_sync */
+ 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
+ .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
+ .emit_ib = vce_v3_0_ring_emit_ib,
+ .emit_vm_flush = vce_v3_0_emit_vm_flush,
+ .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
+ .emit_fence = amdgpu_vce_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+};
+
+static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ if (adev->asic_type >= CHIP_STONEY) {
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
+ DRM_INFO("VCE enabled in VM mode\n");
+ } else {
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
+ adev->vce.ring[i].me = i;
+ }
+ DRM_INFO("VCE enabled in physical mode\n");
+ }
+}
+
+static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
+ .set = vce_v3_0_set_interrupt_state,
+ .process = vce_v3_0_process_interrupt,
+};
+
+static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v3_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &vce_v3_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 3,
+ .minor = 4,
+ .rev = 0,
+ .funcs = &vce_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.h
new file mode 100644
index 000000000..08b908c7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V3_0_H__
+#define __VCE_V3_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v3_0_ip_block;
+extern const struct amdgpu_ip_block_version vce_v3_1_ip_block;
+extern const struct amdgpu_ip_block_version vce_v3_4_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
new file mode 100644
index 000000000..e0b70cd3b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -0,0 +1,1162 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vce.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "mmsch_v1_0.h"
+
+#include "vce/vce_4_0_offset.h"
+#include "vce/vce_4_0_default.h"
+#include "vce/vce_4_0_sh_mask.h"
+#include "mmhub/mmhub_1_0_offset.h"
+#include "mmhub/mmhub_1_0_sh_mask.h"
+
+#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
+
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
+
+#define VCE_V4_0_FW_SIZE (384 * 1024)
+#define VCE_V4_0_STACK_SIZE (64 * 1024)
+#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
+
+static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
+static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
+static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+
+/**
+ * vce_v4_0_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->me == 0)
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
+ else if (ring->me == 1)
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
+ else
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
+}
+
+/**
+ * vce_v4_0_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+
+ if (ring->me == 0)
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
+ else if (ring->me == 1)
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
+ else
+ return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
+}
+
+/**
+ * vce_v4_0_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ return;
+ }
+
+ if (ring->me == 0)
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
+ lower_32_bits(ring->wptr));
+ else if (ring->me == 1)
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
+ lower_32_bits(ring->wptr));
+ else
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
+ lower_32_bits(ring->wptr));
+}
+
+static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ uint32_t status =
+ RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
+
+ if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+ return 0;
+ mdelay(10);
+ }
+
+ DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(10);
+
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
+ uint32_t size;
+
+ size = header->header_size + header->vce_table_size + header->uvd_table_size;
+
+ /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
+ data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
+
+ /* 4, set resp to zero */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
+
+ WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
+ *adev->vce.ring[0].wptr_cpu_addr = 0;
+ adev->vce.ring[0].wptr = 0;
+ adev->vce.ring[0].wptr_old = 0;
+
+ /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+ loop = 1000;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(10);
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
+ struct mmsch_v1_0_cmd_end end = { { 0 } };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
+ header->version = MMSCH_VERSION;
+ header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
+
+ if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
+ header->vce_table_offset = header->header_size;
+ else
+ header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
+
+ init_table += header->vce_table_offset;
+
+ ring = &adev->vce.ring[0];
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
+ ring->ring_size / 4);
+
+ /* BEGING OF MC_RESUME */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
+
+ offset = AMDGPU_VCE_FIRMWARE_OFFSET;
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
+ uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
+ uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
+ (tmr_mc_addr >> 40) & 0xff);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+ adev->vce.gpu_addr >> 8);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
+ (adev->vce.gpu_addr >> 40) & 0xff);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
+ offset & ~0x0f000000);
+
+ }
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
+ adev->vce.gpu_addr >> 8);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
+ (adev->vce.gpu_addr >> 40) & 0xff);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
+ adev->vce.gpu_addr >> 8);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
+ mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
+ (adev->vce.gpu_addr >> 40) & 0xff);
+
+ size = VCE_V4_0_FW_SIZE;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
+
+ offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
+ size = VCE_V4_0_STACK_SIZE;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
+ (offset & ~0x0f000000) | (1 << 24));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
+
+ offset += size;
+ size = VCE_V4_0_DATA_SIZE;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
+ (offset & ~0x0f000000) | (2 << 24));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
+ VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
+ VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+
+ /* end of MC_RESUME */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+ VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
+ ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
+
+ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+ VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
+ VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
+
+ /* clear BUSY flag */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
+ ~VCE_STATUS__JOB_BUSY_MASK, 0);
+
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ header->vce_table_size = table_size;
+ }
+
+ return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+/**
+ * vce_v4_0_start - start VCE block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCE block
+ */
+static int vce_v4_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->vce.ring[0];
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
+
+ ring = &adev->vce.ring[1];
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
+
+ ring = &adev->vce.ring[2];
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
+
+ vce_v4_0_mc_resume(adev);
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
+ ~VCE_STATUS__JOB_BUSY_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
+
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+ mdelay(100);
+
+ r = vce_v4_0_firmware_loaded(adev);
+
+ /* clear BUSY flag */
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
+
+ if (r) {
+ DRM_ERROR("VCE not responding, giving up!!!\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int vce_v4_0_stop(struct amdgpu_device *adev)
+{
+
+ /* Disable VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
+
+ /* hold on ECPU */
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
+ VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+ ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+
+ /* clear VCE_STATUS */
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
+
+ /* Set Clock-Gating off */
+ /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
+ vce_v4_0_set_vce_sw_clock_gating(adev, false);
+ */
+
+ return 0;
+}
+
+static int vce_v4_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
+ adev->vce.num_rings = 1;
+ else
+ adev->vce.num_rings = 3;
+
+ vce_v4_0_set_ring_funcs(adev);
+ vce_v4_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int vce_v4_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+
+ unsigned size;
+ int r, i;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
+ if (r)
+ return r;
+
+ size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ size += VCE_V4_0_FW_SIZE;
+
+ r = amdgpu_vce_sw_init(adev, size);
+ if (r)
+ return r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
+
+ adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vce.saved_bo)
+ return -ENOMEM;
+
+ hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ DRM_INFO("PSP loading VCE firmware\n");
+ } else {
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
+
+ ring = &adev->vce.ring[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vce%d", i);
+ if (amdgpu_sriov_vf(adev)) {
+ /* DOORBELL only works under SRIOV */
+ ring->use_doorbell = true;
+
+ /* currently only use the first encoding ring for sriov,
+ * so set unused location for other unused rings.
+ */
+ if (i == 0)
+ ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
+ else
+ ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
+ }
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+
+ r = amdgpu_vce_entity_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int vce_v4_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* free MM table */
+ amdgpu_virt_free_mm_table(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ kvfree(adev->vce.saved_bo);
+ adev->vce.saved_bo = NULL;
+ }
+
+ r = amdgpu_vce_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_sw_fini(adev);
+}
+
+static int vce_v4_0_hw_init(void *handle)
+{
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ r = vce_v4_0_sriov_start(adev);
+ else
+ r = vce_v4_0_start(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("VCE initialized successfully.\n");
+
+ return 0;
+}
+
+static int vce_v4_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* vce_v4_0_wait_for_idle(handle); */
+ vce_v4_0_stop(adev);
+ } else {
+ /* full access mode, so don't touch any VCE register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ }
+
+ return 0;
+}
+
+static int vce_v4_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r, idx;
+
+ if (adev->vce.vcpu_bo == NULL)
+ return 0;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ void *ptr = adev->vce.cpu_addr;
+
+ memcpy_fromio(adev->vce.saved_bo, ptr, size);
+ }
+ drm_dev_exit(idx);
+ }
+
+ /*
+ * Proper cleanups before halting the HW engine:
+ * - cancel the delayed idle work
+ * - enable powergating
+ * - enable clockgating
+ * - disable dpm
+ *
+ * TODO: to align with the VCN implementation, move the
+ * jobs for clockgating/powergating/dpm setting to
+ * ->set_powergating_state().
+ */
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
+ if (adev->pm.dpm_enabled) {
+ amdgpu_dpm_enable_vce(adev, false);
+ } else {
+ amdgpu_asic_set_vce_clocks(adev, 0, 0);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_CG_STATE_GATE);
+ }
+
+ r = vce_v4_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_vce_suspend(adev);
+}
+
+static int vce_v4_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r, idx;
+
+ if (adev->vce.vcpu_bo == NULL)
+ return -EINVAL;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
+ void *ptr = adev->vce.cpu_addr;
+
+ memcpy_toio(ptr, adev->vce.saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ } else {
+ r = amdgpu_vce_resume(adev);
+ if (r)
+ return r;
+ }
+
+ return vce_v4_0_hw_init(adev);
+}
+
+static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t offset, size;
+ uint64_t tmr_mc_addr;
+
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
+
+ offset = AMDGPU_VCE_FIRMWARE_OFFSET;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+ (tmr_mc_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
+ (tmr_mc_addr >> 40) & 0xff);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
+ (adev->vce.gpu_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
+ (adev->vce.gpu_addr >> 40) & 0xff);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
+ }
+
+ size = VCE_V4_0_FW_SIZE;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
+ offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
+ size = VCE_V4_0_STACK_SIZE;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
+
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
+ offset += size;
+ size = VCE_V4_0_DATA_SIZE;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
+
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
+ VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
+ ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+}
+
+static int vce_v4_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ /* needed for driver unload*/
+ return 0;
+}
+
+#if 0
+static bool vce_v4_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 mask = 0;
+
+ mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
+ mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
+
+ return !(RREG32(mmSRBM_STATUS2) & mask);
+}
+
+static int vce_v4_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++)
+ if (vce_v4_0_is_idle(handle))
+ return 0;
+
+ return -ETIMEDOUT;
+}
+
+#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
+#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
+#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
+#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
+ VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
+
+static bool vce_v4_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset = 0;
+
+ /* According to VCE team , we should use VCE_STATUS instead
+ * SRBM_STATUS.VCE_BUSY bit for busy status checking.
+ * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
+ * instance's registers are accessed
+ * (0 for 1st instance, 10 for 2nd instance).
+ *
+ *VCE_STATUS
+ *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
+ *|----+----+-----------+----+----+----+----------+---------+----|
+ *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
+ *
+ * VCE team suggest use bit 3--bit 6 for busy status check
+ */
+ mutex_lock(&adev->grbm_idx_mutex);
+ WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
+ if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
+ }
+ WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
+ if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
+ }
+ WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ if (srbm_soft_reset) {
+ adev->vce.srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+ adev->vce.srbm_soft_reset = 0;
+ return false;
+ }
+}
+
+static int vce_v4_0_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+ srbm_soft_reset = adev->vce.srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+
+ return 0;
+}
+
+static int vce_v4_0_pre_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return vce_v4_0_suspend(adev);
+}
+
+
+static int vce_v4_0_post_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!adev->vce.srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+
+ return vce_v4_0_resume(adev);
+}
+
+static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
+{
+ u32 tmp, data;
+
+ tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
+ if (override)
+ data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
+ else
+ data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
+
+ if (tmp != data)
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
+}
+
+static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
+ bool gated)
+{
+ u32 data;
+
+ /* Set Override to disable Clock Gating */
+ vce_v4_0_override_vce_clock_gating(adev, true);
+
+ /* This function enables MGCG which is controlled by firmware.
+ With the clocks in the gated state the core is still
+ accessible but the firmware will throttle the clocks on the
+ fly as necessary.
+ */
+ if (gated) {
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
+ data |= 0x1ff;
+ data &= ~0xef0000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
+ data |= 0x3ff000;
+ data &= ~0xffc00000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
+ data |= 0x2;
+ data &= ~0x00010000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
+ data |= 0x37f;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
+ data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
+ 0x8;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
+ } else {
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
+ data &= ~0x80010;
+ data |= 0xe70008;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
+ data |= 0xffc00000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
+ data |= 0x10000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
+ data &= ~0xffc00000;
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
+
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
+ data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
+ VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
+ 0x8);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
+ }
+ vce_v4_0_override_vce_clock_gating(adev, false);
+}
+
+static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
+{
+ u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
+
+ if (enable)
+ tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+ else
+ tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+
+ WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
+}
+
+static int vce_v4_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
+
+ if ((adev->asic_type == CHIP_POLARIS10) ||
+ (adev->asic_type == CHIP_TONGA) ||
+ (adev->asic_type == CHIP_FIJI))
+ vce_v4_0_set_bypass_mode(adev, enable);
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
+ return 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < 2; i++) {
+ /* Program VCE Instance 0 or 1 if not harvested */
+ if (adev->vce.harvest_config & (1 << i))
+ continue;
+
+ WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
+
+ if (enable) {
+ /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
+ uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
+ data &= ~(0xf | 0xff0);
+ data |= ((0x0 << 0) | (0x04 << 4));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
+
+ /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
+ data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
+ data &= ~(0xf | 0xff0);
+ data |= ((0x0 << 0) | (0x04 << 4));
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
+ }
+
+ vce_v4_0_set_vce_sw_clock_gating(adev, enable);
+ }
+
+ WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+#endif
+
+static int vce_v4_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCE block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ return vce_v4_0_stop(adev);
+ else
+ return vce_v4_0_start(adev);
+}
+
+static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCE_CMD_IB_VM);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCE_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCE_CMD_TRAP);
+}
+
+static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCE_CMD_END);
+}
+
+static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t val = 0;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (state == AMDGPU_IRQ_STATE_ENABLE)
+ val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
+
+ WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
+ ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
+ }
+ return 0;
+}
+
+static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCE\n");
+
+ switch (entry->src_data[0]) {
+ case 0:
+ case 1:
+ case 2:
+ amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+const struct amd_ip_funcs vce_v4_0_ip_funcs = {
+ .name = "vce_v4_0",
+ .early_init = vce_v4_0_early_init,
+ .late_init = NULL,
+ .sw_init = vce_v4_0_sw_init,
+ .sw_fini = vce_v4_0_sw_fini,
+ .hw_init = vce_v4_0_hw_init,
+ .hw_fini = vce_v4_0_hw_fini,
+ .suspend = vce_v4_0_suspend,
+ .resume = vce_v4_0_resume,
+ .is_idle = NULL /* vce_v4_0_is_idle */,
+ .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
+ .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
+ .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
+ .soft_reset = NULL /* vce_v4_0_soft_reset */,
+ .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
+ .set_clockgating_state = vce_v4_0_set_clockgating_state,
+ .set_powergating_state = vce_v4_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCE,
+ .align_mask = 0x3f,
+ .nop = VCE_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vce_v4_0_ring_get_rptr,
+ .get_wptr = vce_v4_0_ring_get_wptr,
+ .set_wptr = vce_v4_0_ring_set_wptr,
+ .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vce_v4_0_emit_vm_flush */
+ 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
+ 1, /* vce_v4_0_ring_insert_end */
+ .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
+ .emit_ib = vce_v4_0_ring_emit_ib,
+ .emit_vm_flush = vce_v4_0_emit_vm_flush,
+ .emit_fence = vce_v4_0_ring_emit_fence,
+ .test_ring = amdgpu_vce_ring_test_ring,
+ .test_ib = amdgpu_vce_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vce_v4_0_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vce_ring_begin_use,
+ .end_use = amdgpu_vce_ring_end_use,
+ .emit_wreg = vce_v4_0_emit_wreg,
+ .emit_reg_wait = vce_v4_0_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vce.num_rings; i++) {
+ adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
+ DRM_INFO("VCE enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
+ .set = vce_v4_0_set_interrupt_state,
+ .process = vce_v4_0_process_interrupt,
+};
+
+static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vce.irq.num_types = 1;
+ adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
+};
+
+const struct amdgpu_ip_block_version vce_v4_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCE,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vce_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h
new file mode 100644
index 000000000..a32beda6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCE_V4_0_H__
+#define __VCE_V4_0_H__
+
+extern const struct amdgpu_ip_block_version vce_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
new file mode 100644
index 000000000..2b9ddb3d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "vcn_sw_ring.h"
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP);
+}
+
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
+}
+
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
new file mode 100644
index 000000000..7e775725f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_sw_ring.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_SW_RING_H__
+#define __VCN_SW_RING_H__
+
+#define VCN_SW_RING_EMIT_FRAME_SIZE \
+ (4 + /* vcn_dec_sw_ring_emit_vm_flush */ \
+ 5 + 5 + /* vcn_dec_sw_ring_emit_fence x2 vm fence */ \
+ 1) /* vcn_dec_sw_ring_insert_end */
+
+void vcn_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, uint32_t flags);
+void vcn_dec_sw_ring_insert_end(struct amdgpu_ring *ring);
+void vcn_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void vcn_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void vcn_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ uint32_t vmid, uint64_t pd_addr);
+void vcn_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+
+#endif /* __VCN_SW_RING_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
new file mode 100644
index 000000000..25ba27151
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -0,0 +1,2074 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_cs.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
+#include "mmhub/mmhub_9_1_offset.h"
+#include "mmhub/mmhub_9_1_sh_mask.h"
+
+#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+#include "jpeg_v1_0.h"
+#include "vcn_v1_0.h"
+
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1
+#define mmUVD_REG_XX_MASK_1_0 0x05ac
+#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
+
+static int vcn_v1_0_stop(struct amdgpu_device *adev);
+static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v1_0_early_init - set function pointers and load microcode
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v1_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->vcn.num_enc_rings = 2;
+
+ vcn_v1_0_set_dec_ring_funcs(adev);
+ vcn_v1_0_set_enc_ring_funcs(adev);
+ vcn_v1_0_set_irq_funcs(adev);
+
+ jpeg_v1_0_early_init(handle);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v1_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v1_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN ENC TRAP */
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
+ &adev->vcn.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ /* Override the work func */
+ adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst->ring_dec;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = adev->vcn.inst->external.nop =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
+
+ ring = &adev->vcn.inst->ring_enc[i];
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_enc%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+
+ if (amdgpu_vcnfw_log) {
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+
+ fw_shared->present_flag_0 = 0;
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+ }
+
+ r = jpeg_v1_0_sw_init(handle);
+
+ return r;
+}
+
+/**
+ * vcn_v1_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v1_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ jpeg_v1_0_sw_fini(handle);
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v1_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v1_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ int i, r;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst->ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+
+ ring = adev->jpeg.inst->ring_dec;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v1_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v1_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
+ vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v1_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v1_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool idle_work_unexecuted;
+
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (idle_work_unexecuted) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ }
+
+ r = vcn_v1_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v1_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v1_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v1_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v1_0_mc_resume_spg_mode - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr));
+ offset = size;
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_JPEG_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+}
+
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
+ 0xFFFFFFFF, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
+ offset = size;
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
+ }
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
+
+ /* cache window 1: stack */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
+ 0xFFFFFFFF, 0);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
+ 0xFFFFFFFF, 0);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+}
+
+/**
+ * vcn_v1_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* JPEG disable CGC */
+ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK);
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data);
+
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__SCPU_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v1_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ /* enable JPEG CGC */
+ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK);
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data);
+
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+{
+ uint32_t reg_data = 0;
+
+ /* disable JPEG CGC */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
+}
+
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF);
+ }
+
+ /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF);
+ }
+}
+
+/**
+ * vcn_v1_0_start_spg_mode - start VCN block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the VCN block
+ */
+static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ int i, j, r;
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+
+ vcn_1_0_disable_static_power_gating(adev);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
+
+ /* disable clock gating */
+ vcn_v1_0_disable_clock_gating(adev);
+
+ /* disable interupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* initialize VCN memory controller */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+#endif
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL, tmp);
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v1_0_mc_resume_spg_mode(adev);
+
+ WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
+ RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3);
+
+ /* enable VCPU clock */
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* boot up the VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, tmp);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+ if (status & UVD_STATUS__IDLE)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & UVD_STATUS__IDLE)
+ break;
+
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable system interrupt for JRBC, TODO: move to set interrupt*/
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SYS_INT_EN),
+ UVD_SYS_INT_EN__UVD_JRBC_EN_MASK,
+ ~UVD_SYS_INT_EN__UVD_JRBC_EN_MASK);
+
+ /* clear the busy bit of UVD_STATUS */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) & ~UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ ring = &adev->vcn.inst->ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vcn.inst->ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+
+ jpeg_v1_0_start(adev, 0);
+
+ return 0;
+}
+
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+
+ vcn_1_0_enable_static_power_gating(adev);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
+
+ /* enable clock gating */
+ vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
+
+ /* disable interupt */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
+ 0, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ /* initialize VCN memory controller */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ 0x00100000L, 0xFFFFFFFF, 0);
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+#endif
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_CNTL,
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
+
+ vcn_v1_0_mc_resume_dpg_mode(adev);
+
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
+
+ /* boot up the VCPU */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
+
+ /* enable UMC */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL2,
+ 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT,
+ 0xFFFFFFFF, 0);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
+ UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ /* setup mmUVD_LMI_CTRL */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ 0x00100000L, 0xFFFFFFFF, 1);
+
+ tmp = adev->gfx.config.gb_addr_config;
+ /* setup VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SYS_INT_EN,
+ UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ jpeg_v1_0_start(adev, 1);
+
+ return 0;
+}
+
+static int vcn_v1_0_start(struct amdgpu_device *adev)
+{
+ return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ?
+ vcn_v1_0_start_dpg_mode(adev) : vcn_v1_0_start_spg_mode(adev);
+}
+
+/**
+ * vcn_v1_0_stop_spg_mode - stop VCN block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the VCN block
+ */
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
+{
+ int tmp;
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
+
+ /* stall UMC channel */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* reset LMI UMC/LMI */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ /* put VCPU into reset */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
+
+ vcn_v1_0_enable_clock_gating(adev);
+ vcn_1_0_enable_static_power_gating(adev);
+ return 0;
+}
+
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v1_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ r = vcn_v1_0_stop_dpg_mode(adev);
+ else
+ r = vcn_v1_0_stop_spg_mode(adev);
+
+ return r;
+}
+
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ int ret_code;
+ uint32_t reg_data = 0;
+ uint32_t reg_data2 = 0;
+ struct amdgpu_ring *ring;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG non-jpeg */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ /* Restore */
+ ring = &adev->vcn.inst->ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst->ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst->ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg non-jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based,
+ adev->vcn.inst[inst_idx].pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* Make sure JPRG Snoop is disabled before sending the pause */
+ reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+ /* pause DPG jpeg */
+ reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+ /* Restore */
+ ring = adev->jpeg.inst->ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+ ring = &adev->vcn.inst->ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg;
+ }
+
+ return 0;
+}
+
+static bool vcn_v1_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
+}
+
+static int vcn_v1_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+static int vcn_v1_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (!vcn_v1_0_is_idle(handle))
+ return -EBUSY;
+ vcn_v1_0_enable_clock_gating(adev);
+ } else {
+ /* disable HW gating and enable Sw gating */
+ vcn_v1_0_disable_clock_gating(adev);
+ }
+ return 0;
+}
+
+/**
+ * vcn_v1_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v1_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v1_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v1_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v1_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * vcn_v1_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1);
+}
+
+/**
+ * vcn_v1_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
+}
+
+/**
+ * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1);
+}
+
+/**
+ * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1);
+}
+
+static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
+}
+
+/**
+ * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0])
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+}
+
+ /**
+ * vcn_v1_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0])
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+}
+
+ /**
+ * vcn_v1_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0])
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ else
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
+ lower_32_bits(ring->wptr));
+}
+
+/**
+ * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write enc a fence and a trap command to the ring.
+ */
+static void vcn_v1_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
+}
+
+static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+}
+
+/**
+ * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrive vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write enc ring commands to execute the indirect buffer
+ */
+static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case 124:
+ amdgpu_fence_process(&adev->vcn.inst->ring_dec);
+ break;
+ case 119:
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
+ break;
+ case 120:
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static int vcn_v1_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCN block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v1_0_stop(adev);
+ else
+ ret = vcn_v1_0_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+ return ret;
+}
+
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+
+ fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
+
+ if (fences == 0) {
+ amdgpu_gfx_off_ctrl(adev, true);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ } else {
+ schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+
+ if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
+ DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
+
+ vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
+
+}
+
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (set_clocks) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+}
+
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+}
+
+static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
+ .name = "vcn_v1_0",
+ .early_init = vcn_v1_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v1_0_sw_init,
+ .sw_fini = vcn_v1_0_sw_fini,
+ .hw_init = vcn_v1_0_hw_init,
+ .hw_fini = vcn_v1_0_hw_fini,
+ .suspend = vcn_v1_0_suspend,
+ .resume = vcn_v1_0_resume,
+ .is_idle = vcn_v1_0_is_idle,
+ .wait_for_idle = vcn_v1_0_wait_for_idle,
+ .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
+ .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
+ .soft_reset = NULL /* vcn_v1_0_soft_reset */,
+ .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
+ .set_clockgating_state = vcn_v1_0_set_clockgating_state,
+ .set_powergating_state = vcn_v1_0_set_powergating_state,
+};
+
+/*
+ * It is a hardware issue that VCN can't handle a GTT TMZ buffer on
+ * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain
+ * before command submission as a workaround.
+ */
+static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE);
+ if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+ return -EINVAL;
+
+ bo = mapping->bo_va->base.bo;
+ if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED))
+ return 0;
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ return r;
+}
+
+static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ uint32_t msg_lo = 0, msg_hi = 0;
+ int i, r;
+
+ if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE))
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+ uint32_t val = amdgpu_ib_get_value(ib, i + 1);
+
+ if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ msg_lo = val;
+ } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ msg_hi = val;
+ } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) {
+ r = vcn_v1_0_validate_bo(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .secure_submission_supported = true,
+ .get_rptr = vcn_v1_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v1_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v1_0_dec_ring_set_wptr,
+ .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v1_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v1_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v1_0_dec_ring_insert_nop,
+ .insert_start = vcn_v1_0_dec_ring_insert_start,
+ .insert_end = vcn_v1_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = vcn_v1_0_ring_begin_use,
+ .end_use = vcn_v1_0_ring_end_use,
+ .emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = vcn_v1_0_enc_ring_get_rptr,
+ .get_wptr = vcn_v1_0_enc_ring_get_wptr,
+ .set_wptr = vcn_v1_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v1_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v1_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v1_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v1_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v1_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v1_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = vcn_v1_0_ring_begin_use,
+ .end_use = vcn_v1_0_ring_end_use,
+ .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
+ DRM_INFO("VCN decode is enabled in VM mode\n");
+}
+
+static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
+
+ DRM_INFO("VCN encode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
+ .set = vcn_v1_0_set_interrupt_state,
+ .process = vcn_v1_0_process_interrupt,
+};
+
+static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version vcn_v1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v1_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
new file mode 100644
index 000000000..1f1cc7f0e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V1_0_H__
+#define __VCN_V1_0_H__
+
+void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring);
+void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks);
+
+extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
new file mode 100644
index 000000000..18794394c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -0,0 +1,2106 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_psp.h"
+#include "mmsch_v2_0.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x505
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x53f
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x54a
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x1e1
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
+
+static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
+/**
+ * vcn_v2_0_early_init - set function pointers and load microcode
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ adev->vcn.num_enc_rings = 1;
+ else
+ adev->vcn.num_enc_rings = 2;
+
+ vcn_v2_0_set_dec_ring_funcs(adev);
+ vcn_v2_0_set_enc_ring_funcs(adev);
+ vcn_v2_0_set_irq_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v2_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT,
+ &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN ENC TRAP */
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
+ &adev->vcn.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst->ring_dec;
+
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "vcn_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
+
+ ring = &adev->vcn.inst->ring_enc[i];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i;
+ sprintf(ring->name, "vcn_enc%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(adev->vcn.inst);
+
+ return 0;
+}
+
+/**
+ * vcn_v2_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v2_0_sw_fini(void *handle)
+{
+ int r, idx;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ fw_shared->present_flag_0 = 0;
+ drm_dev_exit(idx);
+ }
+
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v2_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ int i, r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, 0);
+
+ if (amdgpu_sriov_vf(adev))
+ vcn_v2_0_start_sriov(adev);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ //Disable vcn decode for sriov
+ if (amdgpu_sriov_vf(adev))
+ ring->sched.ready = false;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst->ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * vcn_v2_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v2_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v2_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr));
+ offset = size;
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+
+ WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v2_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__SCPU_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v2_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF);
+ }
+
+ /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS,
+ * UVDU_PWR_STATUS are 0 (power on) */
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF);
+ }
+}
+
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+{
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ uint32_t rb_bufsz, tmp;
+
+ vcn_v2_0_enable_static_power_gating(adev);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* release VCPU reset to boot */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_LMI_CTRL2),
+ 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, 0, 0);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ return 0;
+}
+
+static int vcn_v2_0_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ int i, j, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+
+ vcn_v2_0_disable_static_power_gating(adev);
+
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v2_0_disable_clock_gating(adev);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v2_0_mc_resume(adev);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET, tmp);
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+#endif
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst->ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst->ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ return 0;
+}
+
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+{
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v2_0_pause_dpg_mode(adev, 0, &state);
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_0_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_0_stop_dpg_mode(adev);
+ if (r)
+ return r;
+ goto power_off;
+ }
+
+ /* wait for uvd idle */
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* reset LMI UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ /* reset LMI */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* clear status */
+ WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
+
+ vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_static_power_gating(adev);
+
+power_off:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Restore */
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst->ring_enc[0];
+ ring->wptr = 0;
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst->ring_enc[1];
+ ring->wptr = 0;
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+static bool vcn_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
+}
+
+static int vcn_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+static int vcn_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (!vcn_v2_0_is_idle(handle))
+ return -EBUSY;
+ vcn_v2_0_enable_clock_gating(adev);
+ } else {
+ /* disable HW gating and enable Sw gating */
+ vcn_v2_0_disable_clock_gating(adev);
+ }
+ return 0;
+}
+
+/**
+ * vcn_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_nop - insert a nop command
+ *
+ * @ring: amdgpu_ring pointer
+ * @count: the number of NOP packets to insert
+ *
+ * Write a nop command to the ring.
+ */
+void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+/**
+ * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
+}
+
+/**
+ * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, reg << 2);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
+ amdgpu_ring_write(ring, mask);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
+}
+
+void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, reg << 2);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
+}
+
+/**
+ * vcn_v2_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0])
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+}
+
+ /**
+ * vcn_v2_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0]) {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ }
+}
+
+ /**
+ * vcn_v2_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst->ring_enc[0]) {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+/**
+ * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Write enc a fence and a trap command to the ring.
+ */
+void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
+}
+
+void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+}
+
+/**
+ * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @job: job to retrive vmid from
+ * @ib: indirect buffer to execute
+ * @flags: unused
+ *
+ * Write enc ring commands to execute the indirect buffer
+ */
+void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.inst->ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 4);
+ if (r)
+ return r;
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+
+static int vcn_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCN block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v2_0_stop(adev);
+ else
+ ret = vcn_v2_0_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+ return ret;
+}
+
+static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v2_0_init_header *header;
+ uint32_t size;
+ int i;
+
+ header = (struct mmsch_v2_0_init_header *)table->cpu_addr;
+ size = header->header_size + header->vcn_table_size;
+
+ /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+ data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ adev->vcn.inst->ring_dec.wptr = 0;
+ adev->vcn.inst->ring_dec.wptr_old = 0;
+ vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec);
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ adev->vcn.inst->ring_enc[i].wptr = 0;
+ adev->vcn.inst->ring_enc[i].wptr_old = 0;
+ vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]);
+ }
+
+ /* 5, kick off the initialization and wait until
+ * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop = 1000;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(10);
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ DRM_ERROR("failed to init MMSCH, " \
+ "mmMMSCH_VF_MAILBOX_RESP = 0x%08x\n", data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t tmp;
+ struct amdgpu_ring *ring;
+ uint32_t offset, size;
+ uint32_t table_size = 0;
+ struct mmsch_v2_0_cmd_direct_write direct_wt = { {0} };
+ struct mmsch_v2_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} };
+ struct mmsch_v2_0_cmd_end end = { {0} };
+ struct mmsch_v2_0_init_header *header;
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ uint8_t i = 0;
+
+ header = (struct mmsch_v2_0_init_header *)init_table;
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ if (header->vcn_table_offset == 0 && header->vcn_table_size == 0) {
+ header->version = MMSCH_VERSION;
+ header->header_size = sizeof(struct mmsch_v2_0_init_header) >> 2;
+
+ header->vcn_table_offset = header->header_size;
+
+ init_table += header->vcn_table_offset;
+
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ 0xFFFFFFFF, 0x00000004);
+
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo);
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi);
+ offset = 0;
+ } else {
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr));
+ offset = size;
+ }
+
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ 0);
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ size);
+
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst->gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst->gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ for (r = 0; r < adev->vcn.num_enc_rings; ++r) {
+ ring = &adev->vcn.inst->ring_enc[r];
+ ring->wptr = 0;
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+ }
+
+ ring = &adev->vcn.inst->ring_dec;
+ ring->wptr = 0;
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+ /* force RBC into idle state */
+ tmp = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V2_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ /* add end packet */
+ tmp = sizeof(struct mmsch_v2_0_cmd_end);
+ memcpy((void *)init_table, &end, tmp);
+ table_size += (tmp / 4);
+ header->vcn_table_size = table_size;
+
+ }
+ return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table);
+}
+
+static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
+ .name = "vcn_v2_0",
+ .early_init = vcn_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v2_0_sw_init,
+ .sw_fini = vcn_v2_0_sw_fini,
+ .hw_init = vcn_v2_0_hw_init,
+ .hw_fini = vcn_v2_0_hw_fini,
+ .suspend = vcn_v2_0_suspend,
+ .resume = vcn_v2_0_resume,
+ .is_idle = vcn_v2_0_is_idle,
+ .wait_for_idle = vcn_v2_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v2_0_set_clockgating_state,
+ .set_powergating_state = vcn_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .secure_submission_supported = true,
+ .get_rptr = vcn_v2_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v2_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v2_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = vcn_v2_0_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v2_0_enc_ring_get_rptr,
+ .get_wptr = vcn_v2_0_enc_ring_get_wptr,
+ .set_wptr = vcn_v2_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
+ DRM_INFO("VCN decode is enabled in VM mode\n");
+}
+
+static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
+
+ DRM_INFO("VCN encode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
+ .set = vcn_v2_0_set_interrupt_state,
+ .process = vcn_v2_0_process_interrupt,
+};
+
+static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version vcn_v2_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
new file mode 100644
index 000000000..6c9de1882
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V2_0_H__
+#define __VCN_V2_0_H__
+
+extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr);
+extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val);
+extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring);
+
+extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring);
+extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags);
+extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+
+extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
+
+#endif /* __VCN_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
new file mode 100644
index 000000000..6fbea38f4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -0,0 +1,1995 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v1_0.h"
+#include "vcn_v2_5.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
+
+#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+
+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * vcn_v2_5_early_init - set function pointers and load microcode
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v2_5_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.num_vcn_inst = 2;
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_enc_rings = 1;
+ } else {
+ u32 harvest;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->vcn.harvest_config |= 1 << i;
+ }
+ if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
+ AMDGPU_VCN_HARVEST_VCN1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+
+ adev->vcn.num_enc_rings = 2;
+ }
+
+ vcn_v2_5_set_dec_ring_funcs(adev);
+ vcn_v2_5_set_enc_ring_funcs(adev);
+ vcn_v2_5_set_irq_funcs(adev);
+ vcn_v2_5_set_ras_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v2_5_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v2_5_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
+ if (r)
+ return r;
+
+ /* VCN ENC TRAP */
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
+ if (r)
+ return r;
+ }
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
+ VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
+
+ ring = &adev->vcn.inst[j].ring_dec;
+ ring->use_doorbell = true;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? 2*j : 8*j);
+
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "vcn_dec_%d", j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
+
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ ring->use_doorbell = true;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ ring->vm_hub = AMDGPU_MMHUB1(0);
+ else
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "vcn_enc_%d.%d", j, i);
+ r = amdgpu_ring_init(adev, ring, 512,
+ &adev->vcn.inst[j].irq, 0,
+ hw_prio, NULL);
+ if (r)
+ return r;
+ }
+
+ fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v2_5_sw_fini(void *handle)
+{
+ int i, r, idx;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ }
+ drm_dev_exit(idx);
+ }
+
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v2_5_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ r = vcn_v2_5_sriov_start(adev);
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.inst[j].ring_enc[0].sched.ready = true;
+ adev->vcn.inst[j].ring_enc[1].sched.ready = false;
+ adev->vcn.inst[j].ring_enc[2].sched.ready = false;
+ adev->vcn.inst[j].ring_dec.sched.ready = true;
+ } else {
+
+ ring = &adev->vcn.inst[j].ring_dec;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, j);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v2_5_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS)))
+ vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v2_5_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v2_5_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v2_5_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v2_5_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_5_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+ }
+}
+
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v2_5_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
+ }
+}
+
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v2_5_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
+ }
+}
+
+static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
+ bool indirect)
+{
+ uint32_t tmp;
+
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
+
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ vcn_v2_6_enable_ras(adev, inst_idx, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* set the write pointer delay */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ int i, j, k, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return 0;
+
+ /*SW clock gating */
+ vcn_v2_5_disable_clock_gating(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ tmp &= ~0xff;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+ }
+
+ vcn_v2_5_mc_resume(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (k = 0; k < 10; ++k) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(500);
+ else
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop = 0, size = 0;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_1_init_header *header = NULL;
+
+ header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
+ size = header->total_size;
+
+ /*
+ * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
+ * memory descriptor location
+ */
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
+ data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ /*
+ * 5, kick off the initialization and wait until
+ * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop = 10;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(100);
+ data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev,
+ "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
+ data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size, tmp, i, rb_bufsz;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_end end = { { 0 } };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ header->version = MMSCH_VERSION;
+ header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
+ init_table += header->total_size;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ header->eng[i].table_offset = header->total_size;
+ header->eng[i].init_status = 0;
+ header->eng[i].table_size = 0;
+
+ table_size = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
+ size);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+
+ /* refine header */
+ header->eng[i].table_size = table_size;
+ header->total_size += table_size;
+ }
+
+ return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_5_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+
+ vcn_v2_5_enable_clock_gating(adev);
+
+ /* enable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code = 0;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ /* Restore */
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ ring->wptr = 0;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ ring->wptr = 0;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .secure_submission_supported = true,
+ .get_rptr = vcn_v2_5_dec_ring_get_rptr,
+ .get_wptr = vcn_v2_5_dec_ring_get_wptr,
+ .set_wptr = vcn_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = vcn_v2_0_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
+}
+
+/**
+ * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
+ }
+}
+
+/**
+ * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v2_5_enc_ring_get_rptr,
+ .get_wptr = vcn_v2_5_enc_ring_get_wptr,
+ .set_wptr = vcn_v2_5_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
+ adev->vcn.inst[i].ring_dec.me = i;
+ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
+ }
+}
+
+static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+ if (adev->vcn.harvest_config & (1 << j))
+ continue;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
+ adev->vcn.inst[j].ring_enc[i].me = j;
+ }
+ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
+ }
+}
+
+static bool vcn_v2_5_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+static int vcn_v2_5_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int vcn_v2_5_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (enable) {
+ if (!vcn_v2_5_is_idle(handle))
+ return -EBUSY;
+ vcn_v2_5_enable_clock_gating(adev);
+ } else {
+ vcn_v2_5_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if(state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v2_5_stop(adev);
+ else
+ ret = vcn_v2_5_start(adev);
+
+ if(!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
+ .set = vcn_v2_5_set_interrupt_state,
+ .process = vcn_v2_5_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = {
+ .set = vcn_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
+ .name = "vcn_v2_5",
+ .early_init = vcn_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v2_5_sw_init,
+ .sw_fini = vcn_v2_5_sw_fini,
+ .hw_init = vcn_v2_5_hw_init,
+ .hw_fini = vcn_v2_5_hw_fini,
+ .suspend = vcn_v2_5_suspend,
+ .resume = vcn_v2_5_resume,
+ .is_idle = vcn_v2_5_is_idle,
+ .wait_for_idle = vcn_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v2_5_set_clockgating_state,
+ .set_powergating_state = vcn_v2_5_set_powergating_state,
+};
+
+static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
+ .name = "vcn_v2_6",
+ .early_init = vcn_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v2_5_sw_init,
+ .sw_fini = vcn_v2_5_sw_fini,
+ .hw_init = vcn_v2_5_hw_init,
+ .hw_fini = vcn_v2_5_hw_fini,
+ .suspend = vcn_v2_5_suspend,
+ .resume = vcn_v2_5_resume,
+ .is_idle = vcn_v2_5_is_idle,
+ .wait_for_idle = vcn_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v2_5_set_clockgating_state,
+ .set_powergating_state = vcn_v2_5_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 2,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &vcn_v2_5_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 2,
+ .minor = 6,
+ .rev = 0,
+ .funcs = &vcn_v2_6_ip_funcs,
+};
+
+static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V2_6_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
+ .query_poison_status = vcn_v2_6_query_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
+ },
+};
+
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[VCN_HWIP][0]) {
+ case IP_VERSION(2, 6, 0):
+ adev->vcn.ras = &vcn_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
new file mode 100644
index 000000000..1c19af74e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V2_5_H__
+#define __VCN_V2_5_H__
+
+enum amdgpu_vcn_v2_6_sub_block {
+ AMDGPU_VCN_V2_6_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V2_6_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
+extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block;
+
+#endif /* __VCN_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
new file mode 100644
index 000000000..a61ecefda
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -0,0 +1,2237 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v3_0.h"
+#include "vcn_sw_ring.h"
+
+#include "vcn/vcn_3_0_0_offset.h"
+#include "vcn/vcn_3_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#include <drm/drm_drv.h>
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
+
+#define VCN_INSTANCES_SIENNA_CICHLID 2
+#define DEC_SW_RING_ENABLED FALSE
+
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
+static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+
+static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v3_0_early_init - set function pointers and load microcode
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_enc_rings = 1;
+
+ } else {
+ if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
+ AMDGPU_VCN_HARVEST_VCN1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33))
+ adev->vcn.num_enc_rings = 0;
+ else
+ adev->vcn.num_enc_rings = 2;
+ }
+
+ vcn_v3_0_set_dec_ring_funcs(adev);
+ vcn_v3_0_set_enc_ring_funcs(adev);
+ vcn_v3_0_set_irq_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v3_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, j, r;
+ int vcn_doorbell_index = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * Note: doorbell assignment is fixed for SRIOV multiple VCN engines
+ * Formula:
+ * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+ * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1)
+ * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
+ /* get DWORD offset */
+ vcn_doorbell_index = vcn_doorbell_index << 1;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
+ } else {
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
+ }
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT,
+ &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
+
+ /* VCN ENC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
+ } else {
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
+ }
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_enc_%d.%d", i, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ hw_prio, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+ }
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
+ cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
+ cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
+ fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
+ fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
+ if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2))
+ fw_shared->smu_interface_info.smu_interface_type = 2;
+ else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1))
+ fw_shared->smu_interface_info.smu_interface_type = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v3_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sw_ring.is_enabled = false;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v3_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v3_0_start_sriov(adev);
+ if (r)
+ goto done;
+
+ /* initialize VCN dec and enc ring buffers */
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
+ ring->sched.ready = false;
+ ring->no_scheduler = true;
+ dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
+ } else {
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v3_0_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
+ ring->sched.ready = false;
+ ring->no_scheduler = true;
+ dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
+ } else {
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v3_0_enc_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_dec;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v3_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
+ vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v3_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v3_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v3_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+}
+
+static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0, 0x3F3FFFFF);
+ }
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
+ }
+}
+
+/**
+ * vcn_v3_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__IME_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__EFC_MASK
+ | UVD_SUVD_CGC_GATE__SAOE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_AV1_MASK
+ | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
+ | UVD_SUVD_CGC_GATE__SCM_AV1_MASK
+ | UVD_SUVD_CGC_GATE__SMPA_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
+ data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
+ | UVD_SUVD_CGC_GATE2__MPBE1_MASK
+ | UVD_SUVD_CGC_GATE2__SIT_AV1_MASK
+ | UVD_SUVD_CGC_GATE2__SDB_AV1_MASK
+ | UVD_SUVD_CGC_GATE2__MPC1_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v3_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ /* add nop to workaround PSP size check */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ /* Reset FW shared memory RBC WPTR/RPTR */
+ fw_shared->rb.rptr = 0;
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+
+ /*resetting done, fw can check RB ring */
+ fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ return 0;
+}
+
+static int vcn_v3_0_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ int i, j, k, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ /* disable VCN power gating */
+ vcn_v3_0_disable_static_power_gating(adev, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v3_0_disable_clock_gating(adev, i);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v3_0_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+ fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
+{
+ int i, j;
+ struct amdgpu_ring *ring;
+ uint64_t cache_addr;
+ uint64_t rb_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+
+ struct mmsch_v3_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v3_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v3_0_cmd_end end = { {0} };
+ struct mmsch_v3_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
+ for (i = 0; i < MMSCH_V3_0_VCN_INSTANCES; i++) {
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = 0;
+ header.inst[i].table_size = 0;
+ }
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ table_size = 0;
+
+ MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_OFFSET0),
+ 0);
+ } else {
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ ring->wptr = 0;
+ rb_addr = ring->gpu_addr;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_RB_BASE_LO),
+ lower_32_bits(rb_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_RB_BASE_HI),
+ upper_32_bits(rb_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+ }
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->wptr = 0;
+ rb_addr = ring->gpu_addr;
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(rb_addr));
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(rb_addr));
+ /* force RBC into idle state */
+ tmp = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ mmUVD_RBC_RB_CNTL),
+ tmp);
+
+ /* add end packet */
+ MMSCH_V3_0_INSERT_END();
+
+ /* refine header */
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = header.total_size;
+ header.inst[i].table_size = table_size;
+ header.total_size += table_size;
+ }
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v3_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x10000001;
+ WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = param + 1;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
+ if (resp == expected)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for mmMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v3_0_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v3_0_enable_clock_gating(adev, i);
+
+ /* enable VCN power gating */
+ vcn_v3_0_enable_static_power_gating(adev, i);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ /* Stall DPG before WPTR/RPTR reset */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+ ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
+ /* Restore */
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ ring->wptr = 0;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ ring->wptr = 0;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ /* restore wptr/rptr with pointers saved in FW shared memory*/
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr);
+ }
+
+ /* Unstall DPG */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+ 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v3_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v3_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ volatile struct amdgpu_fw_shared *fw_shared;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
+ fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+ WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr));
+ }
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0x3f,
+ .nop = VCN_DEC_SW_CMD_NO_OP,
+ .secure_submission_supported = true,
+ .get_rptr = vcn_v3_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v3_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v3_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ VCN_SW_RING_EMIT_FRAME_SIZE,
+ .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
+ .emit_ib = vcn_dec_sw_ring_emit_ib,
+ .emit_fence = vcn_dec_sw_ring_emit_fence,
+ .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
+ .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_dec_sw_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_dec_sw_ring_emit_wreg,
+ .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
+{
+ struct drm_gpu_scheduler **scheds;
+
+ /* The create msg must be in the first IB submitted */
+ if (atomic_read(&job->base.entity->fence_seq))
+ return -EINVAL;
+
+ /* if VCN0 is harvested, we can't support AV1 */
+ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
+ return -EINVAL;
+
+ scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
+ [AMDGPU_RING_PRIO_DEFAULT].sched;
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
+ return 0;
+}
+
+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *map;
+ uint32_t *msg, num_buffers;
+ struct amdgpu_bo *bo;
+ uint64_t start, end;
+ unsigned int i;
+ void *ptr;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
+ return r;
+ }
+
+ start = map->start * AMDGPU_GPU_PAGE_SIZE;
+ end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+ return r;
+ }
+
+ msg = ptr + addr - start;
+
+ /* Check length */
+ if (msg[1] > end - addr) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msg[3] != RDECODE_MSG_CREATE)
+ goto out;
+
+ num_buffers = msg[2];
+ for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
+ uint32_t offset, size, *create;
+
+ if (msg[0] != RDECODE_MESSAGE_CREATE)
+ continue;
+
+ offset = msg[1];
+ size = msg[2];
+
+ if (offset + size > end) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ create = ptr + addr + offset - start;
+
+ /* H246, HEVC and VP9 can run on any instance */
+ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+ continue;
+
+ r = vcn_v3_0_limit_sched(p, job);
+ if (r)
+ goto out;
+ }
+
+out:
+ amdgpu_bo_kunmap(bo);
+ return r;
+}
+
+static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ uint32_t msg_lo = 0, msg_hi = 0;
+ unsigned i;
+ int r;
+
+ /* The first instance can decode anything */
+ if (!ring->me)
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_ib_get_value(ib, i);
+ uint32_t val = amdgpu_ib_get_value(ib, i + 1);
+
+ if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ msg_lo = val;
+ } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ msg_hi = val;
+ } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
+ val == 0) {
+ r = vcn_v3_0_dec_msg(p, job,
+ ((u64)msg_hi) << 32 | msg_lo);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .secure_submission_supported = true,
+ .get_rptr = vcn_v3_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v3_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v3_0_dec_ring_set_wptr,
+ .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = vcn_v2_0_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v3_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
+}
+
+/**
+ * vcn_v3_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
+ }
+}
+
+/**
+ * vcn_v3_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v3_0_enc_ring_get_rptr,
+ .get_wptr = vcn_v3_0_enc_ring_get_wptr,
+ .set_wptr = vcn_v3_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (!DEC_SW_RING_ENABLED)
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs;
+ else
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
+ adev->vcn.inst[i].ring_dec.me = i;
+ DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i,
+ DEC_SW_RING_ENABLED?"(Software Ring)":"");
+ }
+}
+
+static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[j].me = i;
+ }
+ if (adev->vcn.num_enc_rings > 0)
+ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
+ }
+}
+
+static bool vcn_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+static int vcn_v3_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int vcn_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v3_0_enable_clock_gating(adev, i);
+ } else {
+ vcn_v3_0_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v3_0_stop(adev);
+ else
+ ret = vcn_v3_0_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = {
+ .set = vcn_v3_0_set_interrupt_state,
+ .process = vcn_v3_0_process_interrupt,
+};
+
+static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
+ .name = "vcn_v3_0",
+ .early_init = vcn_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v3_0_sw_init,
+ .sw_fini = vcn_v3_0_sw_fini,
+ .hw_init = vcn_v3_0_hw_init,
+ .hw_fini = vcn_v3_0_hw_fini,
+ .suspend = vcn_v3_0_suspend,
+ .resume = vcn_v3_0_resume,
+ .is_idle = vcn_v3_0_is_idle,
+ .wait_for_idle = vcn_v3_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v3_0_set_clockgating_state,
+ .set_powergating_state = vcn_v3_0_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h
new file mode 100644
index 000000000..31683582d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V3_0_H__
+#define __VCN_V3_0_H__
+
+extern const struct amdgpu_ip_block_version vcn_v3_0_ip_block;
+
+#endif /* __VCN_V3_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
new file mode 100644
index 000000000..29164289c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -0,0 +1,2166 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v4_0.h"
+#include "vcn_v4_0.h"
+
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#include <drm/drm_drv.h>
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+
+#define VCN_HARVEST_MMSCH 0
+
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
+
+/**
+ * vcn_v4_0_early_init - set function pointers and load microcode
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v4_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
+ adev->vcn.harvest_config |= 1 << i;
+ dev_info(adev->dev, "VCN%d is disabled by hypervisor\n", i);
+ }
+ }
+ }
+
+ /* re-use enc ring as unified ring */
+ adev->vcn.num_enc_rings = 1;
+
+ vcn_v4_0_set_unified_ring_funcs(adev);
+ vcn_v4_0_set_irq_funcs(adev);
+ vcn_v4_0_set_ras_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v4_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ /* Init instance 0 sched_score to 1, so it's scheduled after other instances */
+ if (i == 0)
+ atomic_set(&adev->vcn.inst[i].sched_score, 1);
+ else
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].ras_poison_irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev))
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 2)) {
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
+
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_start_sriov(adev);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+
+ }
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ }
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
+ }
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v4_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v4_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
+{
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_disable_static_power_gating - disable VCN static power gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
+ } else {
+ uint32_t value;
+
+ value = (inst) ? 0x2200800 : 0;
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, value, 0x3F3FFFFF);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ return;
+}
+
+/**
+ * vcn_v4_0_enable_static_power_gating - enable VCN static power gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDS_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, regUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDS_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
+ }
+
+ return;
+}
+
+/**
+ * vcn_v4_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @sram_sel: sram select
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+
+ return;
+}
+
+static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
+ bool indirect)
+{
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ vcn_v4_0_enable_ras(adev, inst_idx, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ return 0;
+}
+
+
+/**
+ * vcn_v4_0_start - VCN start
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int i, j, k, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ /* disable VCN power gating */
+ vcn_v4_0_disable_static_power_gating(adev, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_disable_clock_gating(adev, i);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_init_header header;
+
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
+ for (i = 0; i < MMSCH_V4_0_VCN_INSTANCES; i++) {
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = 0;
+ header.inst[i].table_size = 0;
+ }
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[i].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+
+ /* add end packet */
+ MMSCH_V4_0_INSERT_END();
+
+ /* refine header */
+ header.inst[i].init_status = 0;
+ header.inst[i].table_offset = header.total_size;
+ header.inst[i].table_size = table_size;
+ header.total_size += table_size;
+ }
+
+ /* Update init table header in memory */
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ /* message MMSCH (in VCN[0]) to initialize this client
+ * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+ * of memory descriptor location
+ */
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ /* 2, update vmid of descriptor */
+ tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ size = header.total_size;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ /* 5, kick off the initialization and wait until
+ * MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ param = 0x00000001;
+ WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state);
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+}
+
+/**
+ * vcn_v4_0_stop - VCN stop
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_stop(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ uint32_t tmp;
+ int i, r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_enable_clock_gating(adev, i);
+
+ /* enable VCN power gating */
+ vcn_v4_0_enable_static_power_gating(adev, i);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
+ struct dpg_pause_state *new_state)
+{
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
+{
+ struct drm_gpu_scheduler **scheds;
+
+ /* The create msg must be in the first IB submitted */
+ if (atomic_read(&job->base.entity->fence_seq))
+ return -EINVAL;
+
+ /* if VCN0 is harvested, we can't support AV1 */
+ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
+ return -EINVAL;
+
+ scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+ [AMDGPU_RING_PRIO_0].sched;
+ drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
+ return 0;
+}
+
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+ uint64_t addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *map;
+ uint32_t *msg, num_buffers;
+ struct amdgpu_bo *bo;
+ uint64_t start, end;
+ unsigned int i;
+ void *ptr;
+ int r;
+
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
+ }
+
+ start = map->start * AMDGPU_GPU_PAGE_SIZE;
+ end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+ return r;
+ }
+
+ msg = ptr + addr - start;
+
+ /* Check length */
+ if (msg[1] > end - addr) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msg[3] != RDECODE_MSG_CREATE)
+ goto out;
+
+ num_buffers = msg[2];
+ for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
+ uint32_t offset, size, *create;
+
+ if (msg[0] != RDECODE_MESSAGE_CREATE)
+ continue;
+
+ offset = msg[1];
+ size = msg[2];
+
+ if (offset + size > end) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ create = ptr + addr + offset - start;
+
+ /* H264, HEVC and VP9 can run on any instance */
+ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+ continue;
+
+ r = vcn_v4_0_limit_sched(p, job);
+ if (r)
+ goto out;
+ }
+
+out:
+ amdgpu_bo_kunmap(bo);
+ return r;
+}
+
+#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
+#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+
+#define RADEON_VCN_ENGINE_INFO (0x30000001)
+#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
+
+#define RENCODE_ENCODE_STANDARD_AV1 2
+#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
+#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
+
+/* return the offset in ib if id is found, -1 otherwise
+ * to speed up the searching we only search upto max_offset
+ */
+static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+{
+ int i;
+
+ for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
+ if (ib->ptr[i + 1] == id)
+ return i;
+ }
+ return -1;
+}
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ struct amdgpu_vcn_decode_buffer *decode_buffer;
+ uint64_t addr;
+ uint32_t val;
+ int idx;
+
+ /* The first instance can decode anything */
+ if (!ring->me)
+ return 0;
+
+ /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
+ idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
+ RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
+ if (idx < 0) /* engine info is missing */
+ return 0;
+
+ val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
+
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
+ } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
+ idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
+ RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+ if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
+ return vcn_v4_0_limit_sched(p, job);
+ }
+ return 0;
+}
+
+static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_unified_ring_set_wptr,
+ .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 2))
+ vcn_v4_0_unified_ring_vm_funcs.secure_submission_supported = true;
+
+ adev->vcn.inst[i].ring_enc[0].funcs =
+ (const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+
+ DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
+ }
+}
+
+/**
+ * vcn_v4_0_is_idle - check VCN block is idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_wait_for_idle - wait for VCN block idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_set_clockgating_state - set VCN block clockgating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_enable_clock_gating(adev, i);
+ } else {
+ vcn_v4_0_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_set_powergating_state - set VCN block powergating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_stop(adev);
+ else
+ ret = vcn_v4_0_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block interrupt state
+ */
+static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ unsigned type, enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block RAS interrupt state
+ */
+static int vcn_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
+ .set = vcn_v4_0_set_interrupt_state,
+ .process = vcn_v4_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_ras_irq_funcs = {
+ .set = vcn_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+/**
+ * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
+ .name = "vcn_v4_0",
+ .early_init = vcn_v4_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v4_0_sw_init,
+ .sw_fini = vcn_v4_0_sw_fini,
+ .hw_init = vcn_v4_0_hw_init,
+ .hw_fini = vcn_v4_0_hw_fini,
+ .suspend = vcn_v4_0_suspend,
+ .resume = vcn_v4_0_resume,
+ .is_idle = vcn_v4_0_is_idle,
+ .wait_for_idle = vcn_v4_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v4_0_set_clockgating_state,
+ .set_powergating_state = vcn_v4_0_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v4_0_ip_funcs,
+};
+
+static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V4_0_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v4_0_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
+ .query_poison_status = vcn_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
+ },
+};
+
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[VCN_HWIP][0]) {
+ case IP_VERSION(4, 0, 0):
+ adev->vcn.ras = &vcn_v4_0_ras;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
new file mode 100644
index 000000000..7d3d11f40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_H__
+#define __VCN_V4_0_H__
+
+enum amdgpu_vcn_v4_0_sub_block {
+ AMDGPU_VCN_V4_0_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_ip_block;
+
+#endif /* __VCN_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
new file mode 100644
index 000000000..f85d18cd7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -0,0 +1,1767 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
+
+#include "vcn/vcn_4_0_3_offset.h"
+#include "vcn/vcn_4_0_3_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect);
+/**
+ * vcn_v4_0_3_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v4_0_3_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* re-use enc ring as unified ring */
+ adev->vcn.num_enc_rings = 1;
+
+ vcn_v4_0_3_set_unified_ring_funcs(adev);
+ vcn_v4_0_3_set_irq_funcs(adev);
+ vcn_v4_0_3_set_ras_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v4_0_3_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_3_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT,
+ &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = true;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_3_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r, idx;
+
+ if (drm_dev_enter(&adev->ddev, &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = cpu_to_le32(false);
+ }
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_3_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v4_0_3_start_sriov(adev);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v4_0_3_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(
+ adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst,
+ adev->vcn.inst[i].aid_id);
+
+ WREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL,
+ ring->doorbell_index
+ << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(
+ VCN, GET_INST(VCN, ring->me),
+ regVCN_RB1_DB_CTRL);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+
+done:
+ if (!r)
+ DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_3_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
+ vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_3_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = vcn_v4_0_3_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_3_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v4_0_3_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_3_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_lo));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
+ .tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
+ AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(
+ VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @sram_sel: sram select
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t data;
+ int vcn_inst;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
+ inst_idx, adev->vcn.inst[inst_idx].aid_id);
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable clock gating */
+ vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /*resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ return 0;
+}
+
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v4_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v4_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v4_0_cmd_end end = { {0} };
+ struct mmsch_v4_0_3_init_header header;
+
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V4_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v4_0_3_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_start - VCN start
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_3_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ int i, j, k, r, vcn_inst;
+ uint32_t tmp;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ vcn_inst = GET_INST(VCN, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
+ UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_3_disable_clock_gating(adev, i);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
+ tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_3_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst,
+ regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_DEV_ERROR(adev->dev,
+ "VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
+
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ fw_shared->sq.queue_mode &=
+ cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
+
+ }
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop VCN block with dpg mode
+ */
+static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t tmp;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_stop - VCN stop
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ int i, r = 0, vcn_inst;
+ uint32_t tmp;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_3_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
+ UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto Done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* Unblock VCPU Register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* reset LMI UMC/LMI/VCPU */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear VCN status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_3_enable_clock_gating(adev, i);
+ }
+Done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
+ struct dpg_pause_state *new_state)
+{
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
+ regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id =
+ vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+ DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n");
+}
+
+/**
+ * vcn_v4_0_3_is_idle - check VCN block is idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_3_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
+ UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_3_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
+ UVD_STATUS__IDLE, UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_3_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i),
+ regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_3_enable_clock_gating(adev, i);
+ } else {
+ vcn_v4_0_3_disable_clock_gating(adev, i);
+ }
+ }
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
+ *
+ * @handle: amdgpu_device pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+static int vcn_v4_0_3_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_3_stop(adev);
+ else
+ ret = vcn_v4_0_3_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block interrupt state
+ */
+static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
+ * vcn_v4_0_3_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
+ .set = vcn_v4_0_3_set_interrupt_state,
+ .process = vcn_v4_0_3_process_interrupt,
+};
+
+/**
+ * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst->irq.num_types++;
+ }
+ adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
+}
+
+static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
+ .name = "vcn_v4_0_3",
+ .early_init = vcn_v4_0_3_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v4_0_3_sw_init,
+ .sw_fini = vcn_v4_0_3_sw_fini,
+ .hw_init = vcn_v4_0_3_hw_init,
+ .hw_fini = vcn_v4_0_3_hw_fini,
+ .suspend = vcn_v4_0_3_suspend,
+ .resume = vcn_v4_0_3_resume,
+ .is_idle = vcn_v4_0_3_is_idle,
+ .wait_for_idle = vcn_v4_0_3_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
+ .set_powergating_state = vcn_v4_0_3_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 3,
+ .funcs = &vcn_v4_0_3_ip_funcs,
+};
+
+static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
+ {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
+};
+
+static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ /* vcn v4_0_3 only support query uncorrectable errors */
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ NULL, 0, GET_INST(VCN, vcn_inst),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t vcn_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ vcn_v4_0_3_ue_reg_list,
+ ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
+ GET_INST(VCN, vcn_inst));
+}
+
+static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ dev_warn(adev->dev, "VCN RAS is not supported\n");
+ return;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
+ .query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
+ .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ },
+};
+
+static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v4_0_3_ras;
+}
+
+static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
+ int inst_idx, bool indirect)
+{
+ uint32_t tmp;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ return;
+
+ tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+ VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+ tmp, 0, indirect);
+
+ tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+ tmp, 0, indirect);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
new file mode 100644
index 000000000..0b0461143
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_3_H__
+#define __VCN_V4_0_3_H__
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block;
+
+#endif /* __VCN_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
new file mode 100644
index 000000000..d364c6dd1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "soc15.h"
+
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "vega10_ih.h"
+
+#define MAX_REARM_RETRY 10
+
+static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * vega10_ih_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (VEGA10).
+ */
+static void vega10_ih_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+
+ if (adev->irq.ih2.ring_size) {
+ ih_regs = &adev->irq.ih2.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2;
+ }
+}
+
+/**
+ * vega10_ih_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointet
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (VEGA10)
+ */
+static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+ if (amdgpu_sriov_vf(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * vega10_ih_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (VEGA10).
+ */
+static int vega10_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = vega10_ih_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 1 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * vega10_ih_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (VEGA10)
+ */
+static int vega10_ih_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = vega10_ih_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, vega10_ih_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * vega10_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int vega10_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ u32 ih_chicken;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = vega10_ih_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (adev->asic_type == CHIP_RENOIR) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = vega10_ih_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = vega10_ih_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * vega10_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VEGA10).
+ */
+static void vega10_ih_irq_disable(struct amdgpu_device *adev)
+{
+ vega10_ih_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * vega10_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VEGA10). Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
+ /* Only ring0 supports writeback. On other rings fall back
+ * to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
+ */
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ }
+
+ ih_regs = &ih->ih_regs;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catchup.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * vega10_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring to match
+ *
+ */
+static void vega10_ih_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+ /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * vega10_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void vega10_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih_soft)
+ return;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ vega10_ih_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * vega10_ih_self_irq - dispatch work for ring 1 and 2
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int vega10_ih_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ switch (entry->ring_id) {
+ case 1:
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ case 2:
+ schedule_work(&adev->irq.ih2_work);
+ break;
+ default: break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vega10_ih_self_irq_funcs = {
+ .process = vega10_ih_self_irq,
+};
+
+static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs;
+}
+
+static int vega10_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega10_ih_set_interrupt_funcs(adev);
+ vega10_ih_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int vega10_ih_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ adev->irq.ih2.use_doorbell = true;
+ adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+ }
+ /* initialize ih control registers offset */
+ vega10_ih_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int vega10_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int vega10_ih_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vega10_ih_irq_init(adev);
+}
+
+static int vega10_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega10_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int vega10_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vega10_ih_hw_fini(adev);
+}
+
+static int vega10_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vega10_ih_hw_init(adev);
+}
+
+static bool vega10_ih_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int vega10_ih_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int vega10_ih_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ /**
+ * Vega10/12 and RAVEN don't have IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field.
+ */
+ if (adev->asic_type == CHIP_RENOIR)
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val);
+
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+}
+
+static int vega10_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega10_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+
+}
+
+static int vega10_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs vega10_ih_ip_funcs = {
+ .name = "vega10_ih",
+ .early_init = vega10_ih_early_init,
+ .late_init = NULL,
+ .sw_init = vega10_ih_sw_init,
+ .sw_fini = vega10_ih_sw_fini,
+ .hw_init = vega10_ih_hw_init,
+ .hw_fini = vega10_ih_hw_fini,
+ .suspend = vega10_ih_suspend,
+ .resume = vega10_ih_resume,
+ .is_idle = vega10_ih_is_idle,
+ .wait_for_idle = vega10_ih_wait_for_idle,
+ .soft_reset = vega10_ih_soft_reset,
+ .set_clockgating_state = vega10_ih_set_clockgating_state,
+ .set_powergating_state = vega10_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs vega10_ih_funcs = {
+ .get_wptr = vega10_ih_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = vega10_ih_set_rptr
+};
+
+static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &vega10_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version vega10_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vega10_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.h b/drivers/gpu/drm/amd/amdgpu/vega10_ih.h
new file mode 100644
index 000000000..82edd28b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VEGA10_IH_H__
+#define __VEGA10_IH_H__
+
+extern const struct amd_ip_funcs vega10_ih_ip_funcs;
+extern const struct amdgpu_ip_block_version vega10_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
new file mode 100644
index 000000000..6b52a539d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "vega10_ip_offset.h"
+
+int vega10_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke beend by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
+ adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+ return 0;
+}
+
+void vega10_doorbell_index_init(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL64_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL64_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL64_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL64_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL64_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL64_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_DOORBELL64_IH;
+ adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_DOORBELL64_UVD_RING0_1;
+ adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_DOORBELL64_UVD_RING2_3;
+ adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_DOORBELL64_UVD_RING4_5;
+ adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_DOORBELL64_UVD_RING6_7;
+ adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_DOORBELL64_VCE_RING0_1;
+ adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;
+ adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;
+ adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
+
+ /* In unit of dword doorbell */
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 4;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h
new file mode 100644
index 000000000..8de4ccce5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h
@@ -0,0 +1,3335 @@
+/*
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VEGA10_SDMA_PKT_OPEN_H_
+#define __VEGA10_SDMA_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_DUMMY_TRAP 16
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+/*define for op field*/
+#define SDMA_PKT_HEADER_op_offset 0
+#define SDMA_PKT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_op_shift 0
+#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_HEADER_sub_op_offset 0
+#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_sub_op_shift 8
+#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_HEADER_mip_max_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_HEADER_mip_max_shift 20
+#define SDMA_PKT_COPY_TILED_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_mip_max_mask) << SDMA_PKT_COPY_TILED_HEADER_mip_max_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for epitch field*/
+#define SDMA_PKT_COPY_TILED_DW_5_epitch_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_epitch_mask 0x0000FFFF
+#define SDMA_PKT_COPY_TILED_DW_5_epitch_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_EPITCH(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_epitch_mask) << SDMA_PKT_COPY_TILED_DW_5_epitch_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_shift 20
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for epitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_mask 0x0000FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_EPITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x000007FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_T2T_HEADER_mip_max_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_HEADER_mip_max_shift 20
+#define SDMA_PKT_COPY_T2T_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_mip_max_mask) << SDMA_PKT_COPY_T2T_HEADER_mip_max_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_epitch field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_mask 0x0000FFFF
+#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_EPITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_epitch_mask) << SDMA_PKT_COPY_T2T_DW_6_src_epitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_epitch field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_mask 0x0000FFFF
+#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_EPITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_epitch_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_epitch_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for epitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_mask 0x0000FFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_EPITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_shift 20
+#define SDMA_PKT_WRITE_TILED_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_mip_max_mask) << SDMA_PKT_WRITE_TILED_HEADER_mip_max_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for epitch field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_epitch_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_epitch_mask 0x0000FFFF
+#define SDMA_PKT_WRITE_TILED_DW_5_epitch_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_EPITCH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_epitch_mask) << SDMA_PKT_WRITE_TILED_DW_5_epitch_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
new file mode 100644
index 000000000..dbc995364
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "soc15.h"
+
+#include "oss/osssys_4_2_0_offset.h"
+#include "oss/osssys_4_2_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "vega20_ih.h"
+
+#define MAX_REARM_RETRY 10
+
+#define mmIH_CHICKEN_ALDEBARAN 0x18d
+#define mmIH_CHICKEN_ALDEBARAN_BASE_IDX 0
+
+#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN 0x00ea
+#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN_BASE_IDX 0
+#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE__SHIFT 0x10
+#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE_MASK 0x00010000L
+
+static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * vega20_ih_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (VEGA20).
+ */
+static void vega20_ih_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+
+ if (adev->irq.ih2.ring_size) {
+ ih_regs = &adev->irq.ih2.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2;
+ }
+}
+
+/**
+ * vega20_ih_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (VEGA20)
+ */
+static int vega20_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+ if (amdgpu_sriov_vf(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * vega20_ih_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (VEGA20).
+ */
+static int vega20_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = vega20_ih_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t vega20_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 1 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t vega20_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * vega20_ih_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (VEGA20)
+ */
+static int vega20_ih_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = vega20_ih_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, vega20_ih_doorbell_rptr(ih));
+
+ return 0;
+}
+
+static uint32_t vega20_setup_retry_doorbell(u32 doorbell_index)
+{
+ u32 val = 0;
+
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index);
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1);
+
+ return val;
+}
+
+/**
+ * vega20_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int vega20_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
+ u32 ih_chicken;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = vega20_ih_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ }
+
+ /* psp firmware won't program IH_CHICKEN for aldebaran
+ * driver needs to program it properly according to
+ * MC_SPACE type in IH_RB_CNTL */
+ if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) ||
+ (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = vega20_ih_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (!amdgpu_sriov_vf(adev))
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->irq.ih.doorbell_index);
+
+ pci_set_master(adev->pdev);
+
+ /* Allocate the doorbell for IH Retry CAM */
+ adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 3) << 1;
+ WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RETRY_CAM,
+ vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
+
+ /* Enable IH Retry CAM */
+ if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0) ||
+ adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))
+ WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
+ ENABLE, 1);
+ else
+ WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL, ENABLE, 1);
+
+ adev->irq.retry_cam_enabled = true;
+
+ /* enable interrupts */
+ ret = vega20_ih_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * vega20_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VEGA20).
+ */
+static void vega20_ih_irq_disable(struct amdgpu_device *adev)
+{
+ vega20_ih_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * vega20_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VEGA20). Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
+ /* Only ring0 supports writeback. On other rings fall back
+ * to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
+ */
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ }
+
+ ih_regs = &ih->ih_regs;
+
+ /* Double check that the overflow wasn't already cleared. */
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catchup.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * vega20_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ */
+static void vega20_ih_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * vega20_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void vega20_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih == &adev->irq.ih_soft)
+ return;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ vega20_ih_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * vega20_ih_self_irq - dispatch work for ring 1 and 2
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int vega20_ih_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ switch (entry->ring_id) {
+ case 1:
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ case 2:
+ schedule_work(&adev->irq.ih2_work);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vega20_ih_self_irq_funcs = {
+ .process = vega20_ih_self_irq,
+};
+
+static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs;
+}
+
+static int vega20_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega20_ih_set_interrupt_funcs(adev);
+ vega20_ih_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int vega20_ih_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr = true;
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+ if (r)
+ return r;
+
+ if ((adev->flags & AMD_IS_APU) &&
+ (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
+ use_bus_addr = false;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+
+ if (adev->ip_versions[OSSSYS_HWIP][0] != IP_VERSION(4, 4, 2)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ adev->irq.ih2.use_doorbell = true;
+ adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+ }
+
+ /* initialize ih control registers offset */
+ vega20_ih_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, use_bus_addr);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int vega20_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int vega20_ih_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vega20_ih_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int vega20_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega20_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int vega20_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vega20_ih_hw_fini(adev);
+}
+
+static int vega20_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vega20_ih_hw_init(adev);
+}
+
+static bool vega20_ih_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int vega20_ih_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int vega20_ih_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+}
+
+static int vega20_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega20_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+
+}
+
+static int vega20_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs vega20_ih_ip_funcs = {
+ .name = "vega20_ih",
+ .early_init = vega20_ih_early_init,
+ .late_init = NULL,
+ .sw_init = vega20_ih_sw_init,
+ .sw_fini = vega20_ih_sw_fini,
+ .hw_init = vega20_ih_hw_init,
+ .hw_fini = vega20_ih_hw_fini,
+ .suspend = vega20_ih_suspend,
+ .resume = vega20_ih_resume,
+ .is_idle = vega20_ih_is_idle,
+ .wait_for_idle = vega20_ih_wait_for_idle,
+ .soft_reset = vega20_ih_soft_reset,
+ .set_clockgating_state = vega20_ih_set_clockgating_state,
+ .set_powergating_state = vega20_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs vega20_ih_funcs = {
+ .get_wptr = vega20_ih_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = vega20_ih_set_rptr
+};
+
+static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &vega20_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version vega20_ih_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 4,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &vega20_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.h b/drivers/gpu/drm/amd/amdgpu/vega20_ih.h
new file mode 100644
index 000000000..7af6d8758
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VEGA20_IH_H__
+#define __VEGA20_IH_H__
+
+extern const struct amd_ip_funcs vega20_ih_ip_funcs;
+extern const struct amdgpu_ip_block_version vega20_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
new file mode 100644
index 000000000..556f854e3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "vega20_ip_offset.h"
+
+int vega20_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke beend by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
+ }
+ return 0;
+}
+
+void vega20_doorbell_index_init(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_VEGA20_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_VEGA20_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_VEGA20_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_VEGA20_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_VEGA20_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_VEGA20_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.sdma_engine[2] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2;
+ adev->doorbell_index.sdma_engine[3] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3;
+ adev->doorbell_index.sdma_engine[4] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4;
+ adev->doorbell_index.sdma_engine[5] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5;
+ adev->doorbell_index.sdma_engine[6] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6;
+ adev->doorbell_index.sdma_engine[7] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7;
+ adev->doorbell_index.ih = AMDGPU_VEGA20_DOORBELL_IH;
+ adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1;
+ adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3;
+ adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5;
+ adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7;
+ adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1;
+ adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
+ adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
+ adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
new file mode 100644
index 000000000..fe8ba9e98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -0,0 +1,2221 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_sh_mask.h"
+
+#include "smu/smu_7_1_1_d.h"
+#include "smu/smu_7_1_1_sh_mask.h"
+
+#include "uvd/uvd_5_0_d.h"
+#include "uvd/uvd_5_0_sh_mask.h"
+
+#include "vce/vce_3_0_d.h"
+#include "vce/vce_3_0_sh_mask.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
+#include "vid.h"
+#include "vi.h"
+#include "gmc_v8_0.h"
+#include "gmc_v7_0.h"
+#include "gfx_v8_0.h"
+#include "sdma_v2_4.h"
+#include "sdma_v3_0.h"
+#include "dce_v10_0.h"
+#include "dce_v11_0.h"
+#include "iceland_ih.h"
+#include "tonga_ih.h"
+#include "cz_ih.h"
+#include "uvd_v5_0.h"
+#include "uvd_v6_0.h"
+#include "vce_v3_0.h"
+#if defined(CONFIG_DRM_AMD_ACP)
+#include "amdgpu_acp.h"
+#endif
+#include "amdgpu_vkms.h"
+#include "mxgpu_vi.h"
+#include "amdgpu_dm.h"
+
+#define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
+#define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
+#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
+#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_1_OVERRIDE_MASK 0x00000004L
+#define PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_2_OVERRIDE_MASK 0x00000008L
+#define PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_1_OVERRIDE_MASK 0x00000010L
+#define ixPCIE_L1_PM_SUB_CNTL 0x378
+#define PCIE_L1_PM_SUB_CNTL__ASPM_L1_2_EN_MASK 0x00000004L
+#define PCIE_L1_PM_SUB_CNTL__ASPM_L1_1_EN_MASK 0x00000008L
+#define PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_2_EN_MASK 0x00000001L
+#define PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_1_EN_MASK 0x00000002L
+#define PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK 0x00200000L
+#define LINK_CAP 0x64
+#define PCIE_LINK_CAP__CLOCK_POWER_MANAGEMENT_MASK 0x00040000L
+#define ixCPM_CONTROL 0x1400118
+#define ixPCIE_LC_CNTL7 0x100100BC
+#define PCIE_LC_CNTL7__LC_L1_SIDEBAND_CLKREQ_PDWN_EN_MASK 0x00000400L
+#define PCIE_LC_CNTL__LC_L0S_INACTIVITY_DEFAULT 0x00000007
+#define PCIE_LC_CNTL__LC_L1_INACTIVITY_DEFAULT 0x00000009
+#define CPM_CONTROL__CLKREQb_UNGATE_TXCLK_ENABLE_MASK 0x01000000L
+#define PCIE_L1_PM_SUB_CNTL 0x378
+#define ASIC_IS_P22(asic_type, rid) ((asic_type >= CHIP_POLARIS10) && \
+ (asic_type <= CHIP_POLARIS12) && \
+ (rid >= 0x6E))
+/* Topaz */
+static const struct amdgpu_video_codecs topaz_video_codecs_encode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+/* Tonga, CZ, ST, Fiji */
+static const struct amdgpu_video_codec_info tonga_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 4096,
+ .max_height = 2304,
+ .max_pixels_per_frame = 4096 * 2304,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs tonga_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(tonga_video_codecs_encode_array),
+ .codec_array = tonga_video_codecs_encode_array,
+};
+
+/* Polaris */
+static const struct amdgpu_video_codec_info polaris_video_codecs_encode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 4096,
+ .max_height = 2304,
+ .max_pixels_per_frame = 4096 * 2304,
+ .max_level = 0,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
+ .max_width = 4096,
+ .max_height = 2304,
+ .max_pixels_per_frame = 4096 * 2304,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs polaris_video_codecs_encode =
+{
+ .codec_count = ARRAY_SIZE(polaris_video_codecs_encode_array),
+ .codec_array = polaris_video_codecs_encode_array,
+};
+
+/* Topaz */
+static const struct amdgpu_video_codecs topaz_video_codecs_decode =
+{
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+/* Tonga */
+static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 52,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 4,
+ },
+};
+
+static const struct amdgpu_video_codecs tonga_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(tonga_video_codecs_decode_array),
+ .codec_array = tonga_video_codecs_decode_array,
+};
+
+/* CZ, ST, Fiji, Polaris */
+static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
+{
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 3,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 5,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 52,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 4,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 186,
+ },
+ {
+ .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
+ .max_width = 4096,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
+ .max_level = 0,
+ },
+};
+
+static const struct amdgpu_video_codecs cz_video_codecs_decode =
+{
+ .codec_count = ARRAY_SIZE(cz_video_codecs_decode_array),
+ .codec_array = cz_video_codecs_decode_array,
+};
+
+static int vi_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ if (encode)
+ *codecs = &topaz_video_codecs_encode;
+ else
+ *codecs = &topaz_video_codecs_decode;
+ return 0;
+ case CHIP_TONGA:
+ if (encode)
+ *codecs = &tonga_video_codecs_encode;
+ else
+ *codecs = &tonga_video_codecs_decode;
+ return 0;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ if (encode)
+ *codecs = &polaris_video_codecs_encode;
+ else
+ *codecs = &cz_video_codecs_decode;
+ return 0;
+ case CHIP_FIJI:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ if (encode)
+ *codecs = &tonga_video_codecs_encode;
+ else
+ *codecs = &cz_video_codecs_decode;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Indirect registers accessor
+ */
+static u32 vi_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32_NO_KIQ(mmPCIE_INDEX, reg);
+ (void)RREG32_NO_KIQ(mmPCIE_INDEX);
+ r = RREG32_NO_KIQ(mmPCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void vi_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32_NO_KIQ(mmPCIE_INDEX, reg);
+ (void)RREG32_NO_KIQ(mmPCIE_INDEX);
+ WREG32_NO_KIQ(mmPCIE_DATA, v);
+ (void)RREG32_NO_KIQ(mmPCIE_DATA);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+ r = RREG32_NO_KIQ(mmSMC_IND_DATA_11);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ return r;
+}
+
+static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+ WREG32_NO_KIQ(mmSMC_IND_DATA_11, (v));
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
+/* smu_8_0_d.h */
+#define mmMP0PUB_IND_INDEX 0x180
+#define mmMP0PUB_IND_DATA 0x181
+
+static u32 cz_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmMP0PUB_IND_INDEX, (reg));
+ r = RREG32(mmMP0PUB_IND_DATA);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+ return r;
+}
+
+static void cz_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ WREG32(mmMP0PUB_IND_INDEX, (reg));
+ WREG32(mmMP0PUB_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
+static u32 vi_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ r = RREG32(mmUVD_CTX_DATA);
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ return r;
+}
+
+static void vi_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ WREG32(mmUVD_CTX_DATA, (v));
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
+static u32 vi_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(mmDIDT_IND_INDEX, (reg));
+ r = RREG32(mmDIDT_IND_DATA);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(mmDIDT_IND_INDEX, (reg));
+ WREG32(mmDIDT_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ WREG32(mmGC_CAC_IND_INDEX, (reg));
+ r = RREG32(mmGC_CAC_IND_DATA);
+ spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+ return r;
+}
+
+static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+ WREG32(mmGC_CAC_IND_INDEX, (reg));
+ WREG32(mmGC_CAC_IND_DATA, (v));
+ spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+}
+
+
+static const u32 tonga_mgcg_cgcg_init[] =
+{
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
+ mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmSMC_IND_INDEX_4, 0xffffffff, 0xC060000C,
+ mmSMC_IND_DATA_4, 0xc0000fff, 0x00000100,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] =
+{
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
+ mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmSMC_IND_INDEX_4, 0xffffffff, 0xC060000C,
+ mmSMC_IND_DATA_4, 0xc0000fff, 0x00000100,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+};
+
+static const u32 iceland_mgcg_cgcg_init[] =
+{
+ mmPCIE_INDEX, 0xffffffff, ixPCIE_CNTL2,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmSMC_IND_INDEX_4, 0xffffffff, ixCGTT_ROM_CLK_CTRL0,
+ mmSMC_IND_DATA_4, 0xc0000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+};
+
+static const u32 cz_mgcg_cgcg_init[] =
+{
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
+ mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+ mmPCIE_DATA, 0x000f0000, 0x00000000,
+ mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
+};
+
+static const u32 stoney_mgcg_cgcg_init[] =
+{
+ mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00000100,
+ mmHDP_XDP_CGTT_BLK_CTRL, 0xffffffff, 0x00000104,
+ mmHDP_HOST_PATH_CNTL, 0xffffffff, 0x0f000027,
+};
+
+static void vi_init_golden_registers(struct amdgpu_device *adev)
+{
+ /* Some of the registers might be dependent on GRBM_GFX_INDEX */
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_vi_init_golden_registers(adev);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return;
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ amdgpu_device_program_register_sequence(adev,
+ iceland_mgcg_cgcg_init,
+ ARRAY_SIZE(iceland_mgcg_cgcg_init));
+ break;
+ case CHIP_FIJI:
+ amdgpu_device_program_register_sequence(adev,
+ fiji_mgcg_cgcg_init,
+ ARRAY_SIZE(fiji_mgcg_cgcg_init));
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_program_register_sequence(adev,
+ tonga_mgcg_cgcg_init,
+ ARRAY_SIZE(tonga_mgcg_cgcg_init));
+ break;
+ case CHIP_CARRIZO:
+ amdgpu_device_program_register_sequence(adev,
+ cz_mgcg_cgcg_init,
+ ARRAY_SIZE(cz_mgcg_cgcg_init));
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_program_register_sequence(adev,
+ stoney_mgcg_cgcg_init,
+ ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ default:
+ break;
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+/**
+ * vi_get_xclk - get the xclk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the reference clock used by the gfx engine
+ * (VI).
+ */
+static u32 vi_get_xclk(struct amdgpu_device *adev)
+{
+ u32 reference_clock = adev->clock.spll.reference_freq;
+ u32 tmp;
+
+ if (adev->flags & AMD_IS_APU) {
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ /* vbios says 48Mhz, but the actual freq is 100Mhz */
+ return 10000;
+ default:
+ return reference_clock;
+ }
+ }
+
+ tmp = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
+ if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK))
+ return 1000;
+
+ tmp = RREG32_SMC(ixCG_CLKPIN_CNTL);
+ if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL, XTALIN_DIVIDE))
+ return reference_clock / 4;
+
+ return reference_clock;
+}
+
+/**
+ * vi_srbm_select - select specific register instances
+ *
+ * @adev: amdgpu_device pointer
+ * @me: selected ME (micro engine)
+ * @pipe: pipe
+ * @queue: queue
+ * @vmid: VMID
+ *
+ * Switches the currently active registers instances. Some
+ * registers are instanced per VMID, others are instanced per
+ * me/pipe/queue combination.
+ */
+void vi_srbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 srbm_gfx_cntl = 0;
+ srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, PIPEID, pipe);
+ srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, MEID, me);
+ srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, VMID, vmid);
+ srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, QUEUEID, queue);
+ WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl);
+}
+
+static bool vi_read_disabled_bios(struct amdgpu_device *adev)
+{
+ u32 bus_cntl;
+ u32 d1vga_control = 0;
+ u32 d2vga_control = 0;
+ u32 vga_render_control = 0;
+ u32 rom_cntl;
+ bool r;
+
+ bus_cntl = RREG32(mmBUS_CNTL);
+ if (adev->mode_info.num_crtc) {
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
+ }
+ rom_cntl = RREG32_SMC(ixROM_CNTL);
+
+ /* enable the rom */
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
+ if (adev->mode_info.num_crtc) {
+ /* Disable VGA mode */
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D2VGA_CONTROL__D2VGA_MODE_ENABLE_MASK |
+ D2VGA_CONTROL__D2VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
+ }
+ WREG32_SMC(ixROM_CNTL, rom_cntl | ROM_CNTL__SCK_OVERWRITE_MASK);
+
+ r = amdgpu_read_bios(adev);
+
+ /* restore regs */
+ WREG32(mmBUS_CNTL, bus_cntl);
+ if (adev->mode_info.num_crtc) {
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
+ }
+ WREG32_SMC(ixROM_CNTL, rom_cntl);
+ return r;
+}
+
+static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ u32 *dw_ptr;
+ unsigned long flags;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+ /* take the smc lock since we are using the smc index */
+ spin_lock_irqsave(&adev->smc_idx_lock, flags);
+ /* set rom index to 0 */
+ WREG32(mmSMC_IND_INDEX_11, ixROM_INDEX);
+ WREG32(mmSMC_IND_DATA_11, 0);
+ /* set index to data for continous read */
+ WREG32(mmSMC_IND_INDEX_11, ixROM_DATA);
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(mmSMC_IND_DATA_11);
+ spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+
+ return true;
+}
+
+static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = {
+ {mmGRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmGRBM_STATUS_SE2},
+ {mmGRBM_STATUS_SE3},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {mmSRBM_STATUS3},
+ {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+ {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
+ {mmCP_CPF_BUSY_STAT},
+ {mmCP_CPF_STALLED_STAT1},
+ {mmCP_CPF_STATUS},
+ {mmCP_CPC_BUSY_STAT},
+ {mmCP_CPC_STALLED_STAT1},
+ {mmCP_CPC_STATUS},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmGB_MACROTILE_MODE0},
+ {mmGB_MACROTILE_MODE1},
+ {mmGB_MACROTILE_MODE2},
+ {mmGB_MACROTILE_MODE3},
+ {mmGB_MACROTILE_MODE4},
+ {mmGB_MACROTILE_MODE5},
+ {mmGB_MACROTILE_MODE6},
+ {mmGB_MACROTILE_MODE7},
+ {mmGB_MACROTILE_MODE8},
+ {mmGB_MACROTILE_MODE9},
+ {mmGB_MACROTILE_MODE10},
+ {mmGB_MACROTILE_MODE11},
+ {mmGB_MACROTILE_MODE12},
+ {mmGB_MACROTILE_MODE13},
+ {mmGB_MACROTILE_MODE14},
+ {mmGB_MACROTILE_MODE15},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmGB_BACKEND_MAP, false},
+ {mmPA_SC_RASTER_CONFIG, true},
+ {mmPA_SC_RASTER_CONFIG_1, true},
+};
+
+static uint32_t vi_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ uint32_t val;
+ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
+ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
+
+ switch (reg_offset) {
+ case mmCC_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
+ case mmGC_USER_RB_BACKEND_DISABLE:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
+ case mmPA_SC_RASTER_CONFIG:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
+ case mmPA_SC_RASTER_CONFIG_1:
+ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1;
+ }
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+ } else {
+ unsigned idx;
+
+ switch (reg_offset) {
+ case mmGB_ADDR_CONFIG:
+ return adev->gfx.config.gb_addr_config;
+ case mmMC_ARB_RAMCFG:
+ return adev->gfx.config.mc_arb_ramcfg;
+ case mmGB_TILE_MODE0:
+ case mmGB_TILE_MODE1:
+ case mmGB_TILE_MODE2:
+ case mmGB_TILE_MODE3:
+ case mmGB_TILE_MODE4:
+ case mmGB_TILE_MODE5:
+ case mmGB_TILE_MODE6:
+ case mmGB_TILE_MODE7:
+ case mmGB_TILE_MODE8:
+ case mmGB_TILE_MODE9:
+ case mmGB_TILE_MODE10:
+ case mmGB_TILE_MODE11:
+ case mmGB_TILE_MODE12:
+ case mmGB_TILE_MODE13:
+ case mmGB_TILE_MODE14:
+ case mmGB_TILE_MODE15:
+ case mmGB_TILE_MODE16:
+ case mmGB_TILE_MODE17:
+ case mmGB_TILE_MODE18:
+ case mmGB_TILE_MODE19:
+ case mmGB_TILE_MODE20:
+ case mmGB_TILE_MODE21:
+ case mmGB_TILE_MODE22:
+ case mmGB_TILE_MODE23:
+ case mmGB_TILE_MODE24:
+ case mmGB_TILE_MODE25:
+ case mmGB_TILE_MODE26:
+ case mmGB_TILE_MODE27:
+ case mmGB_TILE_MODE28:
+ case mmGB_TILE_MODE29:
+ case mmGB_TILE_MODE30:
+ case mmGB_TILE_MODE31:
+ idx = (reg_offset - mmGB_TILE_MODE0);
+ return adev->gfx.config.tile_mode_array[idx];
+ case mmGB_MACROTILE_MODE0:
+ case mmGB_MACROTILE_MODE1:
+ case mmGB_MACROTILE_MODE2:
+ case mmGB_MACROTILE_MODE3:
+ case mmGB_MACROTILE_MODE4:
+ case mmGB_MACROTILE_MODE5:
+ case mmGB_MACROTILE_MODE6:
+ case mmGB_MACROTILE_MODE7:
+ case mmGB_MACROTILE_MODE8:
+ case mmGB_MACROTILE_MODE9:
+ case mmGB_MACROTILE_MODE10:
+ case mmGB_MACROTILE_MODE11:
+ case mmGB_MACROTILE_MODE12:
+ case mmGB_MACROTILE_MODE13:
+ case mmGB_MACROTILE_MODE14:
+ case mmGB_MACROTILE_MODE15:
+ idx = (reg_offset - mmGB_MACROTILE_MODE0);
+ return adev->gfx.config.macrotile_mode_array[idx];
+ default:
+ return RREG32(reg_offset);
+ }
+ }
+}
+
+static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) {
+ bool indexed = vi_allowed_read_registers[i].grbm_indexed;
+
+ if (reg_offset != vi_allowed_read_registers[i].reg_offset)
+ continue;
+
+ *value = vi_get_register_value(adev, indexed, se_num, sh_num,
+ reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/**
+ * vi_asic_pci_config_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use PCI Config method to reset the GPU.
+ *
+ * Returns 0 for success.
+ */
+static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int r = -EINVAL;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ /* reset */
+ amdgpu_device_pci_config_reset(adev);
+
+ udelay(100);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+ /* enable BM */
+ pci_set_master(adev->pdev);
+ adev->has_hw_reset = true;
+ r = 0;
+ break;
+ }
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return r;
+}
+
+static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
+static enum amd_reset_method
+vi_asic_reset_method(struct amdgpu_device *adev)
+{
+ bool baco_reset;
+
+ if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * vi_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int vi_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* APUs don't have full asic reset */
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ dev_info(adev->dev, "BACO reset\n");
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ dev_info(adev->dev, "PCI CONFIG reset\n");
+ r = vi_asic_pci_config_reset(adev);
+ }
+
+ return r;
+}
+
+static u32 vi_get_config_memsize(struct amdgpu_device *adev)
+{
+ return RREG32(mmCONFIG_MEMSIZE);
+}
+
+static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
+ u32 cntl_reg, u32 status_reg)
+{
+ int r, i;
+ struct atom_clock_dividers dividers;
+ uint32_t tmp;
+
+ r = amdgpu_atombios_get_clock_dividers(adev,
+ COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+ clock, false, &dividers);
+ if (r)
+ return r;
+
+ tmp = RREG32_SMC(cntl_reg);
+
+ if (adev->flags & AMD_IS_APU)
+ tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
+ else
+ tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+ CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+ tmp |= dividers.post_divider;
+ WREG32_SMC(cntl_reg, tmp);
+
+ for (i = 0; i < 100; i++) {
+ tmp = RREG32_SMC(status_reg);
+ if (adev->flags & AMD_IS_APU) {
+ if (tmp & 0x10000)
+ break;
+ } else {
+ if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+ break;
+ }
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+ return 0;
+}
+
+#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
+#define ixGNB_CLK1_STATUS 0xD822010C
+#define ixGNB_CLK2_DFS_CNTL 0xD8220110
+#define ixGNB_CLK2_STATUS 0xD822012C
+#define ixGNB_CLK3_DFS_CNTL 0xD8220130
+#define ixGNB_CLK3_STATUS 0xD822014C
+
+static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ int r;
+
+ if (adev->flags & AMD_IS_APU) {
+ r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
+ if (r)
+ return r;
+
+ r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
+ if (r)
+ return r;
+ } else {
+ r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+ if (r)
+ return r;
+
+ r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ int r, i;
+ struct atom_clock_dividers dividers;
+ u32 tmp;
+ u32 reg_ctrl;
+ u32 reg_status;
+ u32 status_mask;
+ u32 reg_mask;
+
+ if (adev->flags & AMD_IS_APU) {
+ reg_ctrl = ixGNB_CLK3_DFS_CNTL;
+ reg_status = ixGNB_CLK3_STATUS;
+ status_mask = 0x00010000;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ } else {
+ reg_ctrl = ixCG_ECLK_CNTL;
+ reg_status = ixCG_ECLK_STATUS;
+ status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ }
+
+ r = amdgpu_atombios_get_clock_dividers(adev,
+ COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+ ecclk, false, &dividers);
+ if (r)
+ return r;
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32_SMC(reg_status) & status_mask)
+ break;
+ mdelay(10);
+ }
+
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ tmp = RREG32_SMC(reg_ctrl);
+ tmp &= ~reg_mask;
+ tmp |= dividers.post_divider;
+ WREG32_SMC(reg_ctrl, tmp);
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32_SMC(reg_status) & status_mask)
+ break;
+ mdelay(10);
+ }
+
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static void vi_enable_aspm(struct amdgpu_device *adev)
+{
+ u32 data, orig;
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+ data |= PCIE_LC_CNTL__LC_L0S_INACTIVITY_DEFAULT <<
+ PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= PCIE_LC_CNTL__LC_L1_INACTIVITY_DEFAULT <<
+ PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ data |= PCIE_LC_CNTL__LC_DELAY_L1_EXIT_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+}
+
+static void vi_program_aspm(struct amdgpu_device *adev)
+{
+ u32 data, data1, orig;
+ bool bL1SS = false;
+ bool bClkReqSupport = true;
+
+ if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_pcie_dynamic_switching_supported())
+ return;
+
+ if (adev->flags & AMD_IS_APU ||
+ adev->asic_type < CHIP_POLARIS10)
+ return;
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= 0x0024 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT;
+ data |= PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_N_FTS_CNTL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL3, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
+
+ data = RREG32_PCIE(ixPCIE_LC_L1_PM_SUBSTATE);
+ pci_read_config_dword(adev->pdev, PCIE_L1_PM_SUB_CNTL, &data1);
+ if (data & PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK &&
+ (data & (PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK |
+ PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_1_OVERRIDE_MASK |
+ PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_2_OVERRIDE_MASK |
+ PCIE_LC_L1_PM_SUBSTATE__LC_ASPM_L1_1_OVERRIDE_MASK))) {
+ bL1SS = true;
+ } else if (data1 & (PCIE_L1_PM_SUB_CNTL__ASPM_L1_2_EN_MASK |
+ PCIE_L1_PM_SUB_CNTL__ASPM_L1_1_EN_MASK |
+ PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_2_EN_MASK |
+ PCIE_L1_PM_SUB_CNTL__PCI_PM_L1_1_EN_MASK)) {
+ bL1SS = true;
+ }
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL6);
+ data |= PCIE_LC_CNTL6__LC_L1_POWERDOWN_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL6, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data |= PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, data);
+
+ pci_read_config_dword(adev->pdev, LINK_CAP, &data);
+ if (!(data & PCIE_LINK_CAP__CLOCK_POWER_MANAGEMENT_MASK))
+ bClkReqSupport = false;
+
+ if (bClkReqSupport) {
+ orig = data = RREG32_SMC(ixTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) |
+ (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixTHM_CLK_CNTL, data);
+
+ orig = data = RREG32_SMC(ixMISC_CLK_CTRL);
+ data &= ~(MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK |
+ MISC_CLK_CTRL__ZCLK_SEL_MASK | MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT) |
+ (1 << MISC_CLK_CTRL__ZCLK_SEL__SHIFT);
+ data |= (0x20 << MISC_CLK_CTRL__DFT_SMS_PG_CLK_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixMISC_CLK_CTRL, data);
+
+ orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL);
+ data |= CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK;
+ if (orig != data)
+ WREG32_SMC(ixCG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
+ data |= CG_CLKPIN_CNTL_2__ENABLE_XCLK_MASK;
+ if (orig != data)
+ WREG32_SMC(ixCG_CLKPIN_CNTL, data);
+
+ orig = data = RREG32_SMC(ixMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= (4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT);
+ if (orig != data)
+ WREG32_SMC(ixMPLL_BYPASSCLK_SEL, data);
+
+ orig = data = RREG32_PCIE(ixCPM_CONTROL);
+ data |= (CPM_CONTROL__REFCLK_XSTCLK_ENABLE_MASK |
+ CPM_CONTROL__CLKREQb_UNGATE_TXCLK_ENABLE_MASK);
+ if (orig != data)
+ WREG32_PCIE(ixCPM_CONTROL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_CONFIG_CNTL);
+ data &= ~PCIE_CONFIG_CNTL__DYN_CLK_LATENCY_MASK;
+ data |= (0xE << PCIE_CONFIG_CNTL__DYN_CLK_LATENCY__SHIFT);
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_CONFIG_CNTL, data);
+
+ orig = data = RREG32(mmBIF_CLK_CTRL);
+ data |= BIF_CLK_CTRL__BIF_XSTCLK_READY_MASK;
+ if (orig != data)
+ WREG32(mmBIF_CLK_CTRL, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_L1_SIDEBAND_CLKREQ_PDWN_EN_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL7, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_HW_DEBUG);
+ data |= PCIE_HW_DEBUG__HW_01_DEBUG_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_HW_DEBUG, data);
+
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK;
+ if (bL1SS)
+ data &= ~PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL2, data);
+
+ }
+
+ vi_enable_aspm(adev);
+
+ data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+ data1 = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if (((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) &&
+ data1 & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK &&
+ data1 & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK) {
+ orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_CNTL, data);
+ }
+
+ if ((adev->asic_type == CHIP_POLARIS12 &&
+ !(ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))) ||
+ ASIC_IS_P22(adev->asic_type, adev->external_rev_id)) {
+ orig = data = RREG32_PCIE(ixPCIE_LC_TRAINING_CNTL);
+ data &= ~PCIE_LC_TRAINING_CNTL__LC_DISABLE_TRAINING_BIT_ARCH_MASK;
+ if (orig != data)
+ WREG32_PCIE(ixPCIE_LC_TRAINING_CNTL, data);
+ }
+}
+
+static void vi_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* not necessary on CZ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ tmp = RREG32(mmBIF_DOORBELL_APER_EN);
+ if (enable)
+ tmp = REG_SET_FIELD(tmp, BIF_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, BIF_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0);
+
+ WREG32(mmBIF_DOORBELL_APER_EN, tmp);
+}
+
+#define ATI_REV_ID_FUSE_MACRO__ADDRESS 0xC0014044
+#define ATI_REV_ID_FUSE_MACRO__SHIFT 9
+#define ATI_REV_ID_FUSE_MACRO__MASK 0x00001E00
+
+static uint32_t vi_get_rev_id(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ return (RREG32_SMC(ATI_REV_ID_FUSE_MACRO__ADDRESS) & ATI_REV_ID_FUSE_MACRO__MASK)
+ >> ATI_REV_ID_FUSE_MACRO__SHIFT;
+ else
+ return (RREG32(mmPCIE_EFUSE4) & PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID_MASK)
+ >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT;
+}
+
+static void vi_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void vi_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
+static bool vi_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* CZ has hang issues with full reset at the moment */
+ return false;
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ /* XXX: soft reset should work on fiji and tonga */
+ return true;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ default:
+ /* change this when we support soft reset */
+ return true;
+ }
+}
+
+static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
+static bool vi_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 clock_cntl, pc;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* check if the SMC is already running */
+ clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
+ pc = RREG32_SMC(ixSMC_PC_C);
+ if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
+ (0x20100 <= pc))
+ return true;
+
+ return false;
+}
+
+static void vi_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static const struct amdgpu_asic_funcs vi_asic_funcs =
+{
+ .read_disabled_bios = &vi_read_disabled_bios,
+ .read_bios_from_rom = &vi_read_bios_from_rom,
+ .read_register = &vi_read_register,
+ .reset = &vi_asic_reset,
+ .reset_method = &vi_asic_reset_method,
+ .get_xclk = &vi_get_xclk,
+ .set_uvd_clocks = &vi_set_uvd_clocks,
+ .set_vce_clocks = &vi_set_vce_clocks,
+ .get_config_memsize = &vi_get_config_memsize,
+ .flush_hdp = &vi_flush_hdp,
+ .invalidate_hdp = &vi_invalidate_hdp,
+ .need_full_reset = &vi_need_full_reset,
+ .init_doorbell_index = &legacy_doorbell_index_init,
+ .get_pcie_usage = &vi_get_pcie_usage,
+ .need_reset_on_init = &vi_need_reset_on_init,
+ .get_pcie_replay_count = &vi_get_pcie_replay_count,
+ .supports_baco = &vi_asic_supports_baco,
+ .pre_asic_init = &vi_pre_asic_init,
+ .query_video_codecs = &vi_query_video_codecs,
+};
+
+#define CZ_REV_BRISTOL(rev) \
+ ((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6))
+
+static int vi_common_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->flags & AMD_IS_APU) {
+ adev->smc_rreg = &cz_smc_rreg;
+ adev->smc_wreg = &cz_smc_wreg;
+ } else {
+ adev->smc_rreg = &vi_smc_rreg;
+ adev->smc_wreg = &vi_smc_wreg;
+ }
+ adev->pcie_rreg = &vi_pcie_rreg;
+ adev->pcie_wreg = &vi_pcie_wreg;
+ adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
+ adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg;
+ adev->didt_rreg = &vi_didt_rreg;
+ adev->didt_wreg = &vi_didt_wreg;
+ adev->gc_cac_rreg = &vi_gc_cac_rreg;
+ adev->gc_cac_wreg = &vi_gc_cac_wreg;
+
+ adev->asic_funcs = &vi_asic_funcs;
+
+ adev->rev_id = vi_get_rev_id(adev);
+ adev->external_rev_id = 0xFF;
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = 0x1;
+ break;
+ case CHIP_FIJI:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x3c;
+ break;
+ case CHIP_TONGA:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x14;
+ break;
+ case CHIP_POLARIS11:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x5A;
+ break;
+ case CHIP_POLARIS10:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ case CHIP_POLARIS12:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x64;
+ break;
+ case CHIP_VEGAM:
+ adev->cg_flags = 0;
+ /*AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;*/
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x6E;
+ break;
+ case CHIP_CARRIZO:
+ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCE_MGCG;
+ /* rev0 hardware requires workarounds to support PG */
+ adev->pg_flags = 0;
+ if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) {
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_PIPELINE |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_UVD |
+ AMD_PG_SUPPORT_VCE;
+ }
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ case CHIP_STONEY:
+ adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCE_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_PIPELINE |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_UVD |
+ AMD_PG_SUPPORT_VCE;
+ adev->external_rev_id = adev->rev_id + 0x61;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_vi_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int vi_common_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_vi_mailbox_get_irq(adev);
+
+ return 0;
+}
+
+static int vi_common_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_vi_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int vi_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int vi_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* move the golden regs per IP block */
+ vi_init_golden_registers(adev);
+ /* enable aspm */
+ vi_program_aspm(adev);
+ /* enable the doorbell aperture */
+ vi_enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int vi_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* enable the doorbell aperture */
+ vi_enable_doorbell_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_vi_mailbox_put_irq(adev);
+
+ return 0;
+}
+
+static int vi_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vi_common_hw_fini(adev);
+}
+
+static int vi_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return vi_common_hw_init(adev);
+}
+
+static bool vi_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int vi_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int vi_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ temp = data = RREG32_PCIE(ixPCIE_CNTL2);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
+ else
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+
+ if (temp != data)
+ WREG32_PCIE(ixPCIE_CNTL2, data);
+}
+
+static void vi_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ temp = data = RREG32(mmHDP_HOST_PATH_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ data &= ~HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK;
+ else
+ data |= HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK;
+
+ if (temp != data)
+ WREG32(mmHDP_HOST_PATH_CNTL, data);
+}
+
+static void vi_update_hdp_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ temp = data = RREG32(mmHDP_MEM_POWER_LS);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+ else
+ data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+
+ if (temp != data)
+ WREG32(mmHDP_MEM_POWER_LS, data);
+}
+
+static void vi_update_drm_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ temp = data = RREG32(0x157a);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DRM_LS))
+ data |= 1;
+ else
+ data &= ~1;
+
+ if (temp != data)
+ WREG32(0x157a, data);
+}
+
+
+static void vi_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t temp, data;
+
+ temp = data = RREG32_SMC(ixCGTT_ROM_CLK_CTRL0);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
+ else
+ data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
+ CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK;
+
+ if (temp != data)
+ WREG32_SMC(ixCGTT_ROM_CLK_CTRL0, data);
+}
+
+static int vi_common_set_clockgating_state_by_smu(void *handle,
+ enum amd_clockgating_state state)
+{
+ uint32_t msg_id, pp_state = 0;
+ uint32_t pp_support_state = 0;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_MC_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_MC,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_SDMA_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_SDMA,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ pp_support_state = PP_STATE_SUPPORT_LS;
+ pp_state = PP_STATE_LS;
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG) {
+ pp_support_state |= PP_STATE_SUPPORT_CG;
+ pp_state |= PP_STATE_CG;
+ }
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_HDP,
+ pp_support_state,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS) {
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_BIF,
+ PP_STATE_SUPPORT_LS,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+ if (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG) {
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_CG;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_BIF,
+ PP_STATE_SUPPORT_CG,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_DRM_LS) {
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_LS;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_DRM,
+ PP_STATE_SUPPORT_LS,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG) {
+
+ if (state == AMD_CG_STATE_UNGATE)
+ pp_state = 0;
+ else
+ pp_state = PP_STATE_CG;
+
+ msg_id = PP_CG_MSG_ID(PP_GROUP_SYS,
+ PP_BLOCK_SYS_ROM,
+ PP_STATE_SUPPORT_CG,
+ pp_state);
+ amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
+ }
+ return 0;
+}
+
+static int vi_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ vi_update_bif_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_hdp_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_hdp_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_rom_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ vi_update_bif_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_hdp_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_hdp_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ vi_update_drm_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ vi_common_set_clockgating_state_by_smu(adev, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int vi_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void vi_common_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(ixPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+
+ /* AMD_CG_SUPPORT_HDP_LS */
+ data = RREG32(mmHDP_MEM_POWER_LS);
+ if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ data = RREG32(mmHDP_HOST_PATH_CNTL);
+ if (!(data & HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_ROM_MGCG */
+ data = RREG32_SMC(ixCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+static const struct amd_ip_funcs vi_common_ip_funcs = {
+ .name = "vi_common",
+ .early_init = vi_common_early_init,
+ .late_init = vi_common_late_init,
+ .sw_init = vi_common_sw_init,
+ .sw_fini = vi_common_sw_fini,
+ .hw_init = vi_common_hw_init,
+ .hw_fini = vi_common_hw_fini,
+ .suspend = vi_common_suspend,
+ .resume = vi_common_resume,
+ .is_idle = vi_common_is_idle,
+ .wait_for_idle = vi_common_wait_for_idle,
+ .soft_reset = vi_common_soft_reset,
+ .set_clockgating_state = vi_common_set_clockgating_state,
+ .set_powergating_state = vi_common_set_powergating_state,
+ .get_clockgating_state = vi_common_get_clockgating_state,
+};
+
+static const struct amdgpu_ip_block_version vi_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vi_common_ip_funcs,
+};
+
+void vi_set_virt_ops(struct amdgpu_device *adev)
+{
+ adev->virt.ops = &xgpu_vi_virt_ops;
+}
+
+int vi_set_ip_blocks(struct amdgpu_device *adev)
+{
+ amdgpu_device_set_sriov_virtual_display(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_TOPAZ:
+ /* topaz has no DCE, UVD, VCE */
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block);
+ amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+ break;
+ case CHIP_FIJI:
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block);
+ if (!amdgpu_sriov_vf(adev)) {
+ amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
+ }
+ break;
+ case CHIP_TONGA:
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block);
+ if (!amdgpu_sriov_vf(adev)) {
+ amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
+ }
+ break;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
+ break;
+ case CHIP_CARRIZO:
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
+#if defined(CONFIG_DRM_AMD_ACP)
+ amdgpu_device_ip_block_add(adev, &acp_ip_block);
+#endif
+ break;
+ case CHIP_STONEY:
+ amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->enable_virtual_display)
+ amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ else
+ amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
+#if defined(CONFIG_DRM_AMD_ACP)
+ amdgpu_device_ip_block_add(adev, &acp_ip_block);
+#endif
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void legacy_doorbell_index_init(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL_MEC_RING7;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL_GFX_RING0;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_DOORBELL_IH;
+ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_MAX_ASSIGNMENT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
new file mode 100644
index 000000000..9718f98f8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VI_H__
+#define __VI_H__
+
+#define VI_FLUSH_GPU_TLB_NUM_WREG 3
+
+void vi_srbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+void vi_set_virt_ops(struct amdgpu_device *adev);
+int vi_set_ip_blocks(struct amdgpu_device *adev);
+
+void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
new file mode 100644
index 000000000..80ce42aac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -0,0 +1,514 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef VI_H
+#define VI_H
+
+#define SDMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
+#define SDMA_MAX_INSTANCE 2
+
+#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */
+
+/* crtc instance offsets */
+#define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c)
+#define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c)
+#define CRTC2_REGISTER_OFFSET (0x1f9c - 0x1b9c)
+#define CRTC3_REGISTER_OFFSET (0x419c - 0x1b9c)
+#define CRTC4_REGISTER_OFFSET (0x439c - 0x1b9c)
+#define CRTC5_REGISTER_OFFSET (0x459c - 0x1b9c)
+#define CRTC6_REGISTER_OFFSET (0x479c - 0x1b9c)
+
+/* dig instance offsets */
+#define DIG0_REGISTER_OFFSET (0x4a00 - 0x4a00)
+#define DIG1_REGISTER_OFFSET (0x4b00 - 0x4a00)
+#define DIG2_REGISTER_OFFSET (0x4c00 - 0x4a00)
+#define DIG3_REGISTER_OFFSET (0x4d00 - 0x4a00)
+#define DIG4_REGISTER_OFFSET (0x4e00 - 0x4a00)
+#define DIG5_REGISTER_OFFSET (0x4f00 - 0x4a00)
+#define DIG6_REGISTER_OFFSET (0x5400 - 0x4a00)
+#define DIG7_REGISTER_OFFSET (0x5600 - 0x4a00)
+#define DIG8_REGISTER_OFFSET (0x5700 - 0x4a00)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x17a8 - 0x17a8)
+#define AUD1_REGISTER_OFFSET (0x17ac - 0x17a8)
+#define AUD2_REGISTER_OFFSET (0x17b0 - 0x17a8)
+#define AUD3_REGISTER_OFFSET (0x17b4 - 0x17a8)
+#define AUD4_REGISTER_OFFSET (0x17b8 - 0x17a8)
+#define AUD5_REGISTER_OFFSET (0x17bc - 0x17a8)
+#define AUD6_REGISTER_OFFSET (0x17c0 - 0x17a8)
+#define AUD7_REGISTER_OFFSET (0x17c4 - 0x17a8)
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1898 - 0x1898)
+#define HPD1_REGISTER_OFFSET (0x18a0 - 0x1898)
+#define HPD2_REGISTER_OFFSET (0x18a8 - 0x1898)
+#define HPD3_REGISTER_OFFSET (0x18b0 - 0x1898)
+#define HPD4_REGISTER_OFFSET (0x18b8 - 0x1898)
+#define HPD5_REGISTER_OFFSET (0x18c0 - 0x1898)
+
+#define PIPEID(x) ((x) << 0)
+#define MEID(x) ((x) << 2)
+#define VMID(x) ((x) << 4)
+#define QUEUEID(x) ((x) << 8)
+
+#define MC_SEQ_MISC0__MT__MASK 0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
+#define MC_SEQ_MISC0__MT__DDR2 0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3 0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4 0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5 0x50000000
+#define MC_SEQ_MISC0__MT__HBM 0x60000000
+#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
+
+/*
+ * PM4
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_CONST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_CLIENT_CODE ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_TCL2_VOLATILE (1 << 22)
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+# define PACKET3_DEST_BASE_0_ENA (1 << 0)
+# define PACKET3_DEST_BASE_1_ENA (1 << 1)
+# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
+# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
+# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
+# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
+# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
+# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
+# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
+# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
+# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
+# define PACKET3_TCL1_VOL_ACTION_ENA (1 << 15)
+# define PACKET3_TC_VOL_ACTION_ENA (1 << 16) /* L2 */
+# define PACKET3_TC_WB_ACTION_ENA (1 << 18) /* L2 */
+# define PACKET3_DEST_BASE_2_ENA (1 << 19)
+# define PACKET3_DEST_BASE_3_ENA (1 << 21)
+# define PACKET3_TCL1_ACTION_ENA (1 << 22)
+# define PACKET3_TC_ACTION_ENA (1 << 23) /* L2 */
+# define PACKET3_CB_ACTION_ENA (1 << 25)
+# define PACKET3_DB_ACTION_ENA (1 << 26)
+# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+# define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28)
+# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - EOP events
+ * 6 - EOS events
+ */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define EOP_TCL1_VOL_ACTION_EN (1 << 12)
+#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */
+#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
+#define EOP_TCL1_ACTION_EN (1 << 16)
+#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
+#define EOP_TCL2_VOLATILE (1 << 24)
+#define EOP_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_RELEASE_MEM 0x49
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_DIS_WC (1 << 21)
+# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_ACQUIRE_MEM 0x58
+#define PACKET3_REWIND 0x59
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_FRAME_CONTROL 0x90
+# define FRAME_CMD(x) ((x) << 28)
+ /*
+ * x=0: tmz_begin
+ * x=1: tmz_end
+ */
+#define PACKET3_SET_RESOURCES 0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
+# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
+#define PACKET3_MAP_QUEUES 0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
+# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
+# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2 */
+# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
+# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 29)
+# define PACKET3_MAP_QUEUES_ME(x) ((x) << 31)
+#define PACKET3_UNMAP_QUEUES 0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
+ /* 0 - PREEMPT_QUEUES
+ * 1 - RESET_QUEUES
+ * 2 - DISABLE_PROCESS_QUEUES
+ * 3 - PREEMPT_QUEUES_NO_UNMAP
+ */
+# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2a */
+# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
+/* CONTROL4 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define PACKET3_QUERY_STATUS 0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
+# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
+# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
+/* CONTROL2a */
+# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+
+
+#define VCE_CMD_NO_OP 0x00000000
+#define VCE_CMD_END 0x00000001
+#define VCE_CMD_IB 0x00000002
+#define VCE_CMD_FENCE 0x00000003
+#define VCE_CMD_TRAP 0x00000004
+#define VCE_CMD_IB_AUTO 0x00000005
+#define VCE_CMD_SEMAPHORE 0x00000006
+
+#define VCE_CMD_IB_VM 0x00000102
+#define VCE_CMD_WAIT_GE 0x00000106
+#define VCE_CMD_UPDATE_PTB 0x00000107
+#define VCE_CMD_FLUSH_TLB 0x00000108
+
+/* HEVC ENC */
+#define HEVC_ENC_CMD_NO_OP 0x00000000
+#define HEVC_ENC_CMD_END 0x00000001
+#define HEVC_ENC_CMD_FENCE 0x00000003
+#define HEVC_ENC_CMD_TRAP 0x00000004
+#define HEVC_ENC_CMD_IB_VM 0x00000102
+#define HEVC_ENC_CMD_WAIT_GE 0x00000106
+#define HEVC_ENC_CMD_UPDATE_PTB 0x00000107
+#define HEVC_ENC_CMD_FLUSH_TLB 0x00000108
+
+/* mmPA_SC_RASTER_CONFIG mask */
+#define RB_MAP_PKR0(x) ((x) << 0)
+#define RB_MAP_PKR0_MASK (0x3 << 0)
+#define RB_MAP_PKR1(x) ((x) << 2)
+#define RB_MAP_PKR1_MASK (0x3 << 2)
+#define RB_XSEL2(x) ((x) << 4)
+#define RB_XSEL2_MASK (0x3 << 4)
+#define RB_XSEL (1 << 6)
+#define RB_YSEL (1 << 7)
+#define PKR_MAP(x) ((x) << 8)
+#define PKR_MAP_MASK (0x3 << 8)
+#define PKR_XSEL(x) ((x) << 10)
+#define PKR_XSEL_MASK (0x3 << 10)
+#define PKR_YSEL(x) ((x) << 12)
+#define PKR_YSEL_MASK (0x3 << 12)
+#define SC_MAP(x) ((x) << 16)
+#define SC_MAP_MASK (0x3 << 16)
+#define SC_XSEL(x) ((x) << 18)
+#define SC_XSEL_MASK (0x3 << 18)
+#define SC_YSEL(x) ((x) << 20)
+#define SC_YSEL_MASK (0x3 << 20)
+#define SE_MAP(x) ((x) << 24)
+#define SE_MAP_MASK (0x3 << 24)
+#define SE_XSEL(x) ((x) << 26)
+#define SE_XSEL_MASK (0x3 << 26)
+#define SE_YSEL(x) ((x) << 28)
+#define SE_YSEL_MASK (0x3 << 28)
+
+/* mmPA_SC_RASTER_CONFIG_1 mask */
+#define SE_PAIR_MAP(x) ((x) << 0)
+#define SE_PAIR_MAP_MASK (0x3 << 0)
+#define SE_PAIR_XSEL(x) ((x) << 2)
+#define SE_PAIR_XSEL_MASK (0x3 << 2)
+#define SE_PAIR_YSEL(x) ((x) << 4)
+#define SE_PAIR_YSEL_MASK (0x3 << 4)
+
+#endif